From 89f8f0d71f85c8455c5a375fcfae30bbdf0e77f1 Mon Sep 17 00:00:00 2001
From: Nico de Poel <ndepoel@gmail.com>
Date: Sat, 23 Aug 2025 13:05:59 +0200
Subject: [PATCH] FSR 2.3.4/3.1.5 update: Fix for possible negative rcas
 output.

---
 Shaders/shaders/ffx_core_gpu_common.h      |  8 ++++----
 Shaders/shaders/ffx_core_gpu_common_half.h |  8 ++++----
 Shaders/shaders/fsr1/ffx_fsr1.h            | 23 ++++++++++++----------
 3 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/Shaders/shaders/ffx_core_gpu_common.h b/Shaders/shaders/ffx_core_gpu_common.h
index 9f88c94..b9f9bc0 100644
--- a/Shaders/shaders/ffx_core_gpu_common.h
+++ b/Shaders/shaders/ffx_core_gpu_common.h
@@ -1,6 +1,6 @@
 // This file is part of the FidelityFX SDK.
 //
-// Copyright (C) 2024 Advanced Micro Devices, Inc.
+// Copyright (C) 2025 Advanced Micro Devices, Inc.
 // 
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
@@ -2652,7 +2652,7 @@ FfxFloat32x3 ffxSrgbFromLinear(FfxFloat32x3 value)
 /// @ingroup GPUCore
 FfxFloat32 ffxLinearFromSrgb(FfxFloat32 value)
 {
-    FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+    FfxFloat32x3 j = FfxFloat32x3(0.04045, 1.0 / 12.92, 2.4);;
     FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
     return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.x), value * j.y, pow(value * k.x + k.y, j.z));
 }
@@ -2669,7 +2669,7 @@ FfxFloat32 ffxLinearFromSrgb(FfxFloat32 value)
 /// @ingroup GPUCore
 FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 value)
 {
-    FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+    FfxFloat32x3 j = FfxFloat32x3(0.04045, 1.0 / 12.92, 2.4);;
     FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
     return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xx), value * j.yy, pow(value * k.xx + k.yy, j.zz));
 }
@@ -2686,7 +2686,7 @@ FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 value)
 /// @ingroup GPUCore
 FfxFloat32x3 ffxLinearFromSrgb(FfxFloat32x3 value)
 {
-    FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+    FfxFloat32x3 j = FfxFloat32x3(0.04045, 1.0 / 12.92, 2.4);;
     FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
     return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xxx), value * j.yyy, pow(value * k.xxx + k.yyy, j.zzz));
 }
diff --git a/Shaders/shaders/ffx_core_gpu_common_half.h b/Shaders/shaders/ffx_core_gpu_common_half.h
index 1cb780b..ede0e68 100644
--- a/Shaders/shaders/ffx_core_gpu_common_half.h
+++ b/Shaders/shaders/ffx_core_gpu_common_half.h
@@ -1,6 +1,6 @@
 // This file is part of the FidelityFX SDK.
 //
-// Copyright (C) 2024 Advanced Micro Devices, Inc.
+// Copyright (C) 2025 Advanced Micro Devices, Inc.
 // 
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
@@ -2895,7 +2895,7 @@ FfxFloat16x3 ffxLinearFromGammaHalf(FfxFloat16x3 c, FfxFloat16 x)
 /// @ingroup GPUCore
 FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c)
 {
-    FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+    FfxFloat16x3 j = FfxFloat16x3(0.04045, 1.0 / 12.92, 2.4);
     FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
     return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z));
 }
@@ -2912,7 +2912,7 @@ FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c)
 /// @ingroup GPUCore
 FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c)
 {
-    FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+    FfxFloat16x3 j = FfxFloat16x3(0.04045, 1.0 / 12.92, 2.4);
     FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
     return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz));
 }
@@ -2929,7 +2929,7 @@ FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c)
 /// @ingroup GPUCore
 FfxFloat16x3 ffxLinearFromSrgbHalf(FfxFloat16x3 c)
 {
-    FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+    FfxFloat16x3 j = FfxFloat16x3(0.04045, 1.0 / 12.92, 2.4);
     FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
     return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz));
 }
diff --git a/Shaders/shaders/fsr1/ffx_fsr1.h b/Shaders/shaders/fsr1/ffx_fsr1.h
index 82ebf21..450882d 100644
--- a/Shaders/shaders/fsr1/ffx_fsr1.h
+++ b/Shaders/shaders/fsr1/ffx_fsr1.h
@@ -1,6 +1,6 @@
 // This file is part of the FidelityFX SDK.
 //
-// Copyright (C) 2024 Advanced Micro Devices, Inc.
+// Copyright (C) 2025 Advanced Micro Devices, Inc.
 // 
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
@@ -748,9 +748,10 @@ void FsrEasuH(
      // Immediate constants for peak range.
      FfxFloat32x2 peakC = FfxFloat32x2(1.0, -1.0 * 4.0);
      // Limiters, these need to be high precision RCPs.
-     FfxFloat32 hitMinR = mn4R * ffxReciprocal(FfxFloat32(4.0) * mx4R);
-     FfxFloat32 hitMinG = mn4G * ffxReciprocal(FfxFloat32(4.0) * mx4G);
-     FfxFloat32 hitMinB = mn4B * ffxReciprocal(FfxFloat32(4.0) * mx4B);
+     const FfxFloat32 lowerLimiterMultiplier = ffxSaturate(eL / ffxMin(ffxMin3(bL, dL, fL), hL));
+     FfxFloat32 hitMinR = mn4R * ffxReciprocal(FfxFloat32(4.0) * mx4R) * lowerLimiterMultiplier;
+     FfxFloat32 hitMinG = mn4G * ffxReciprocal(FfxFloat32(4.0) * mx4G) * lowerLimiterMultiplier;
+     FfxFloat32 hitMinB = mn4B * ffxReciprocal(FfxFloat32(4.0) * mx4B) * lowerLimiterMultiplier;
      FfxFloat32 hitMaxR = (peakC.x - mx4R) * ffxReciprocal(FfxFloat32(4.0) * mn4R + peakC.y);
      FfxFloat32 hitMaxG = (peakC.x - mx4G) * ffxReciprocal(FfxFloat32(4.0) * mn4G + peakC.y);
      FfxFloat32 hitMaxB = (peakC.x - mx4B) * ffxReciprocal(FfxFloat32(4.0) * mn4B + peakC.y);
@@ -848,9 +849,10 @@ void FsrEasuH(
   // Immediate constants for peak range.
   FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0);
   // Limiters, these need to be high precision RCPs.
-  FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4R);
-  FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4G);
-  FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4B);
+  const FfxFloat16 lowerLimiterMultiplier = ffxSaturate(eL / min(ffxMin3Half(bL, dL, fL), hL));
+  FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4R) * lowerLimiterMultiplier;
+  FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4G) * lowerLimiterMultiplier;
+  FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4B) * lowerLimiterMultiplier;
   FfxFloat16 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4R+peakC.y);
   FfxFloat16 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4G+peakC.y);
   FfxFloat16 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4B+peakC.y);
@@ -967,9 +969,10 @@ void FsrEasuH(
   // Immediate constants for peak range.
   FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0);
   // Limiters, these need to be high precision RCPs.
-  FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4R);
-  FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4G);
-  FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4B);
+  const FfxFloat16x2 lowerLimiterMultiplier = ffxSaturate(eL / min(ffxMin3Half(bL, dL, fL), hL));
+  FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4R) * lowerLimiterMultiplier;
+  FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4G) * lowerLimiterMultiplier;
+  FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4B) * lowerLimiterMultiplier;
   FfxFloat16x2 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4R+peakC.y);
   FfxFloat16x2 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4G+peakC.y);
   FfxFloat16x2 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4B+peakC.y);