Browse Source

FSR 2.3.4/3.1.5 update: Fix for possible negative rcas output.

fsr3.1.4
Nico de Poel 5 months ago
parent
commit
89f8f0d71f
  1. 8
      Shaders/shaders/ffx_core_gpu_common.h
  2. 8
      Shaders/shaders/ffx_core_gpu_common_half.h
  3. 23
      Shaders/shaders/fsr1/ffx_fsr1.h

8
Shaders/shaders/ffx_core_gpu_common.h

@ -1,6 +1,6 @@
// This file is part of the FidelityFX SDK. // This file is part of the FidelityFX SDK.
// //
// Copyright (C) 2024 Advanced Micro Devices, Inc.
// Copyright (C) 2025 Advanced Micro Devices, Inc.
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy // Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal // of this software and associated documentation files(the "Software"), to deal
@ -2652,7 +2652,7 @@ FfxFloat32x3 ffxSrgbFromLinear(FfxFloat32x3 value)
/// @ingroup GPUCore /// @ingroup GPUCore
FfxFloat32 ffxLinearFromSrgb(FfxFloat32 value) FfxFloat32 ffxLinearFromSrgb(FfxFloat32 value)
{ {
FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
FfxFloat32x3 j = FfxFloat32x3(0.04045, 1.0 / 12.92, 2.4);;
FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.x), value * j.y, pow(value * k.x + k.y, j.z)); return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.x), value * j.y, pow(value * k.x + k.y, j.z));
} }
@ -2669,7 +2669,7 @@ FfxFloat32 ffxLinearFromSrgb(FfxFloat32 value)
/// @ingroup GPUCore /// @ingroup GPUCore
FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 value) FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 value)
{ {
FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
FfxFloat32x3 j = FfxFloat32x3(0.04045, 1.0 / 12.92, 2.4);;
FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xx), value * j.yy, pow(value * k.xx + k.yy, j.zz)); return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xx), value * j.yy, pow(value * k.xx + k.yy, j.zz));
} }
@ -2686,7 +2686,7 @@ FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 value)
/// @ingroup GPUCore /// @ingroup GPUCore
FfxFloat32x3 ffxLinearFromSrgb(FfxFloat32x3 value) FfxFloat32x3 ffxLinearFromSrgb(FfxFloat32x3 value)
{ {
FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
FfxFloat32x3 j = FfxFloat32x3(0.04045, 1.0 / 12.92, 2.4);;
FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xxx), value * j.yyy, pow(value * k.xxx + k.yyy, j.zzz)); return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xxx), value * j.yyy, pow(value * k.xxx + k.yyy, j.zzz));
} }

8
Shaders/shaders/ffx_core_gpu_common_half.h

@ -1,6 +1,6 @@
// This file is part of the FidelityFX SDK. // This file is part of the FidelityFX SDK.
// //
// Copyright (C) 2024 Advanced Micro Devices, Inc.
// Copyright (C) 2025 Advanced Micro Devices, Inc.
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy // Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal // of this software and associated documentation files(the "Software"), to deal
@ -2895,7 +2895,7 @@ FfxFloat16x3 ffxLinearFromGammaHalf(FfxFloat16x3 c, FfxFloat16 x)
/// @ingroup GPUCore /// @ingroup GPUCore
FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c) FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c)
{ {
FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
FfxFloat16x3 j = FfxFloat16x3(0.04045, 1.0 / 12.92, 2.4);
FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z)); return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z));
} }
@ -2912,7 +2912,7 @@ FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c)
/// @ingroup GPUCore /// @ingroup GPUCore
FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c) FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c)
{ {
FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
FfxFloat16x3 j = FfxFloat16x3(0.04045, 1.0 / 12.92, 2.4);
FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz)); return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz));
} }
@ -2929,7 +2929,7 @@ FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c)
/// @ingroup GPUCore /// @ingroup GPUCore
FfxFloat16x3 ffxLinearFromSrgbHalf(FfxFloat16x3 c) FfxFloat16x3 ffxLinearFromSrgbHalf(FfxFloat16x3 c)
{ {
FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
FfxFloat16x3 j = FfxFloat16x3(0.04045, 1.0 / 12.92, 2.4);
FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz)); return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz));
} }

23
Shaders/shaders/fsr1/ffx_fsr1.h

@ -1,6 +1,6 @@
// This file is part of the FidelityFX SDK. // This file is part of the FidelityFX SDK.
// //
// Copyright (C) 2024 Advanced Micro Devices, Inc.
// Copyright (C) 2025 Advanced Micro Devices, Inc.
// //
// Permission is hereby granted, free of charge, to any person obtaining a copy // Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal // of this software and associated documentation files(the "Software"), to deal
@ -748,9 +748,10 @@ void FsrEasuH(
// Immediate constants for peak range. // Immediate constants for peak range.
FfxFloat32x2 peakC = FfxFloat32x2(1.0, -1.0 * 4.0); FfxFloat32x2 peakC = FfxFloat32x2(1.0, -1.0 * 4.0);
// Limiters, these need to be high precision RCPs. // Limiters, these need to be high precision RCPs.
FfxFloat32 hitMinR = mn4R * ffxReciprocal(FfxFloat32(4.0) * mx4R);
FfxFloat32 hitMinG = mn4G * ffxReciprocal(FfxFloat32(4.0) * mx4G);
FfxFloat32 hitMinB = mn4B * ffxReciprocal(FfxFloat32(4.0) * mx4B);
const FfxFloat32 lowerLimiterMultiplier = ffxSaturate(eL / ffxMin(ffxMin3(bL, dL, fL), hL));
FfxFloat32 hitMinR = mn4R * ffxReciprocal(FfxFloat32(4.0) * mx4R) * lowerLimiterMultiplier;
FfxFloat32 hitMinG = mn4G * ffxReciprocal(FfxFloat32(4.0) * mx4G) * lowerLimiterMultiplier;
FfxFloat32 hitMinB = mn4B * ffxReciprocal(FfxFloat32(4.0) * mx4B) * lowerLimiterMultiplier;
FfxFloat32 hitMaxR = (peakC.x - mx4R) * ffxReciprocal(FfxFloat32(4.0) * mn4R + peakC.y); FfxFloat32 hitMaxR = (peakC.x - mx4R) * ffxReciprocal(FfxFloat32(4.0) * mn4R + peakC.y);
FfxFloat32 hitMaxG = (peakC.x - mx4G) * ffxReciprocal(FfxFloat32(4.0) * mn4G + peakC.y); FfxFloat32 hitMaxG = (peakC.x - mx4G) * ffxReciprocal(FfxFloat32(4.0) * mn4G + peakC.y);
FfxFloat32 hitMaxB = (peakC.x - mx4B) * ffxReciprocal(FfxFloat32(4.0) * mn4B + peakC.y); FfxFloat32 hitMaxB = (peakC.x - mx4B) * ffxReciprocal(FfxFloat32(4.0) * mn4B + peakC.y);
@ -848,9 +849,10 @@ void FsrEasuH(
// Immediate constants for peak range. // Immediate constants for peak range.
FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0); FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0);
// Limiters, these need to be high precision RCPs. // Limiters, these need to be high precision RCPs.
FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4R);
FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4G);
FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4B);
const FfxFloat16 lowerLimiterMultiplier = ffxSaturate(eL / min(ffxMin3Half(bL, dL, fL), hL));
FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4R) * lowerLimiterMultiplier;
FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4G) * lowerLimiterMultiplier;
FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4B) * lowerLimiterMultiplier;
FfxFloat16 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4R+peakC.y); FfxFloat16 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4R+peakC.y);
FfxFloat16 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4G+peakC.y); FfxFloat16 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4G+peakC.y);
FfxFloat16 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4B+peakC.y); FfxFloat16 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4B+peakC.y);
@ -967,9 +969,10 @@ void FsrEasuH(
// Immediate constants for peak range. // Immediate constants for peak range.
FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0); FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0);
// Limiters, these need to be high precision RCPs. // Limiters, these need to be high precision RCPs.
FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4R);
FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4G);
FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4B);
const FfxFloat16x2 lowerLimiterMultiplier = ffxSaturate(eL / min(ffxMin3Half(bL, dL, fL), hL));
FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4R) * lowerLimiterMultiplier;
FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4G) * lowerLimiterMultiplier;
FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4B) * lowerLimiterMultiplier;
FfxFloat16x2 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4R+peakC.y); FfxFloat16x2 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4R+peakC.y);
FfxFloat16x2 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4G+peakC.y); FfxFloat16x2 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4G+peakC.y);
FfxFloat16x2 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4B+peakC.y); FfxFloat16x2 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4B+peakC.y);

Loading…
Cancel
Save