FSR 2.3.4/3.1.5 update: Fix for possible negative rcas output.

5 months ago · 89f8f0d71f
3 changed files with 21 additions and 18 deletions
--- a/Shaders/shaders/ffx_core_gpu_common.h
+++ b/Shaders/shaders/ffx_core_gpu_common.h
@ -1,6 +1,6 @@
 // This file is part of the FidelityFX SDK.
 //
 // Copyright (C) 2024 Advanced Micro Devices, Inc.
 // Copyright (C) 2025 Advanced Micro Devices, Inc.
 // 
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
@ -2652,7 +2652,7 @@ FfxFloat32x3 ffxSrgbFromLinear(FfxFloat32x3 value)
 /// @ingroup GPUCore
 FfxFloat32 ffxLinearFromSrgb(FfxFloat32 value)
 {
    FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
    FfxFloat32x3 j = FfxFloat32x3(0.04045, 1.0 / 12.92, 2.4);;
    FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
    return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.x), value * j.y, pow(value * k.x + k.y, j.z));
 }
@ -2669,7 +2669,7 @@ FfxFloat32 ffxLinearFromSrgb(FfxFloat32 value)
 /// @ingroup GPUCore
 FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 value)
 {
    FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
    FfxFloat32x3 j = FfxFloat32x3(0.04045, 1.0 / 12.92, 2.4);;
    FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
    return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xx), value * j.yy, pow(value * k.xx + k.yy, j.zz));
 }
@ -2686,7 +2686,7 @@ FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 value)
 /// @ingroup GPUCore
 FfxFloat32x3 ffxLinearFromSrgb(FfxFloat32x3 value)
 {
    FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
    FfxFloat32x3 j = FfxFloat32x3(0.04045, 1.0 / 12.92, 2.4);;
    FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
    return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xxx), value * j.yyy, pow(value * k.xxx + k.yyy, j.zzz));
 }
--- a/Shaders/shaders/ffx_core_gpu_common_half.h
+++ b/Shaders/shaders/ffx_core_gpu_common_half.h
@ -1,6 +1,6 @@
 // This file is part of the FidelityFX SDK.
 //
 // Copyright (C) 2024 Advanced Micro Devices, Inc.
 // Copyright (C) 2025 Advanced Micro Devices, Inc.
 // 
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
@ -2895,7 +2895,7 @@ FfxFloat16x3 ffxLinearFromGammaHalf(FfxFloat16x3 c, FfxFloat16 x)
 /// @ingroup GPUCore
 FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c)
 {
    FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
    FfxFloat16x3 j = FfxFloat16x3(0.04045, 1.0 / 12.92, 2.4);
    FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
    return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z));
 }
@ -2912,7 +2912,7 @@ FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c)
 /// @ingroup GPUCore
 FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c)
 {
    FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
    FfxFloat16x3 j = FfxFloat16x3(0.04045, 1.0 / 12.92, 2.4);
    FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
    return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz));
 }
@ -2929,7 +2929,7 @@ FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c)
 /// @ingroup GPUCore
 FfxFloat16x3 ffxLinearFromSrgbHalf(FfxFloat16x3 c)
 {
    FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
    FfxFloat16x3 j = FfxFloat16x3(0.04045, 1.0 / 12.92, 2.4);
    FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
    return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz));
 }
--- a/Shaders/shaders/fsr1/ffx_fsr1.h
+++ b/Shaders/shaders/fsr1/ffx_fsr1.h
@ -1,6 +1,6 @@
 // This file is part of the FidelityFX SDK.
 //
 // Copyright (C) 2024 Advanced Micro Devices, Inc.
 // Copyright (C) 2025 Advanced Micro Devices, Inc.
 // 
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files(the "Software"), to deal
@ -748,9 +748,10 @@ void FsrEasuH(
     // Immediate constants for peak range.
     FfxFloat32x2 peakC = FfxFloat32x2(1.0, -1.0 * 4.0);
     // Limiters, these need to be high precision RCPs.
     FfxFloat32 hitMinR = mn4R * ffxReciprocal(FfxFloat32(4.0) * mx4R);
     FfxFloat32 hitMinG = mn4G * ffxReciprocal(FfxFloat32(4.0) * mx4G);
     FfxFloat32 hitMinB = mn4B * ffxReciprocal(FfxFloat32(4.0) * mx4B);
     const FfxFloat32 lowerLimiterMultiplier = ffxSaturate(eL / ffxMin(ffxMin3(bL, dL, fL), hL));
     FfxFloat32 hitMinR = mn4R * ffxReciprocal(FfxFloat32(4.0) * mx4R) * lowerLimiterMultiplier;
     FfxFloat32 hitMinG = mn4G * ffxReciprocal(FfxFloat32(4.0) * mx4G) * lowerLimiterMultiplier;
     FfxFloat32 hitMinB = mn4B * ffxReciprocal(FfxFloat32(4.0) * mx4B) * lowerLimiterMultiplier;
     FfxFloat32 hitMaxR = (peakC.x - mx4R) * ffxReciprocal(FfxFloat32(4.0) * mn4R + peakC.y);
     FfxFloat32 hitMaxG = (peakC.x - mx4G) * ffxReciprocal(FfxFloat32(4.0) * mn4G + peakC.y);
     FfxFloat32 hitMaxB = (peakC.x - mx4B) * ffxReciprocal(FfxFloat32(4.0) * mn4B + peakC.y);
@ -848,9 +849,10 @@ void FsrEasuH(
  // Immediate constants for peak range.
  FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0);
  // Limiters, these need to be high precision RCPs.
  FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4R);
  FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4G);
  FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4B);
  const FfxFloat16 lowerLimiterMultiplier = ffxSaturate(eL / min(ffxMin3Half(bL, dL, fL), hL));
  FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4R) * lowerLimiterMultiplier;
  FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4G) * lowerLimiterMultiplier;
  FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4B) * lowerLimiterMultiplier;
  FfxFloat16 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4R+peakC.y);
  FfxFloat16 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4G+peakC.y);
  FfxFloat16 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4B+peakC.y);
@ -967,9 +969,10 @@ void FsrEasuH(
  // Immediate constants for peak range.
  FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0);
  // Limiters, these need to be high precision RCPs.
  FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4R);
  FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4G);
  FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4B);
  const FfxFloat16x2 lowerLimiterMultiplier = ffxSaturate(eL / min(ffxMin3Half(bL, dL, fL), hL));
  FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4R) * lowerLimiterMultiplier;
  FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4G) * lowerLimiterMultiplier;
  FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4B) * lowerLimiterMultiplier;
  FfxFloat16x2 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4R+peakC.y);
  FfxFloat16x2 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4G+peakC.y);
  FfxFloat16x2 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4B+peakC.y);