From 89f8f0d71f85c8455c5a375fcfae30bbdf0e77f1 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 23 Aug 2025 13:05:59 +0200 Subject: [PATCH] FSR 2.3.4/3.1.5 update: Fix for possible negative rcas output. --- Shaders/shaders/ffx_core_gpu_common.h | 8 ++++---- Shaders/shaders/ffx_core_gpu_common_half.h | 8 ++++---- Shaders/shaders/fsr1/ffx_fsr1.h | 23 ++++++++++++---------- 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/Shaders/shaders/ffx_core_gpu_common.h b/Shaders/shaders/ffx_core_gpu_common.h index 9f88c94..b9f9bc0 100644 --- a/Shaders/shaders/ffx_core_gpu_common.h +++ b/Shaders/shaders/ffx_core_gpu_common.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (C) 2024 Advanced Micro Devices, Inc. +// Copyright (C) 2025 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal @@ -2652,7 +2652,7 @@ FfxFloat32x3 ffxSrgbFromLinear(FfxFloat32x3 value) /// @ingroup GPUCore FfxFloat32 ffxLinearFromSrgb(FfxFloat32 value) { - FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat32x3 j = FfxFloat32x3(0.04045, 1.0 / 12.92, 2.4);; FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.x), value * j.y, pow(value * k.x + k.y, j.z)); } @@ -2669,7 +2669,7 @@ FfxFloat32 ffxLinearFromSrgb(FfxFloat32 value) /// @ingroup GPUCore FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 value) { - FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat32x3 j = FfxFloat32x3(0.04045, 1.0 / 12.92, 2.4);; FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xx), value * j.yy, pow(value * k.xx + k.yy, j.zz)); } @@ -2686,7 +2686,7 @@ FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 value) /// @ingroup GPUCore FfxFloat32x3 ffxLinearFromSrgb(FfxFloat32x3 value) { - FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat32x3 j = FfxFloat32x3(0.04045, 1.0 / 12.92, 2.4);; FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xxx), value * j.yyy, pow(value * k.xxx + k.yyy, j.zzz)); } diff --git a/Shaders/shaders/ffx_core_gpu_common_half.h b/Shaders/shaders/ffx_core_gpu_common_half.h index 1cb780b..ede0e68 100644 --- a/Shaders/shaders/ffx_core_gpu_common_half.h +++ b/Shaders/shaders/ffx_core_gpu_common_half.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (C) 2024 Advanced Micro Devices, Inc. +// Copyright (C) 2025 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal @@ -2895,7 +2895,7 @@ FfxFloat16x3 ffxLinearFromGammaHalf(FfxFloat16x3 c, FfxFloat16 x) /// @ingroup GPUCore FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c) { - FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat16x3 j = FfxFloat16x3(0.04045, 1.0 / 12.92, 2.4); FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z)); } @@ -2912,7 +2912,7 @@ FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c) /// @ingroup GPUCore FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c) { - FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat16x3 j = FfxFloat16x3(0.04045, 1.0 / 12.92, 2.4); FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz)); } @@ -2929,7 +2929,7 @@ FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c) /// @ingroup GPUCore FfxFloat16x3 ffxLinearFromSrgbHalf(FfxFloat16x3 c) { - FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat16x3 j = FfxFloat16x3(0.04045, 1.0 / 12.92, 2.4); FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz)); } diff --git a/Shaders/shaders/fsr1/ffx_fsr1.h b/Shaders/shaders/fsr1/ffx_fsr1.h index 82ebf21..450882d 100644 --- a/Shaders/shaders/fsr1/ffx_fsr1.h +++ b/Shaders/shaders/fsr1/ffx_fsr1.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (C) 2024 Advanced Micro Devices, Inc. +// Copyright (C) 2025 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal @@ -748,9 +748,10 @@ void FsrEasuH( // Immediate constants for peak range. FfxFloat32x2 peakC = FfxFloat32x2(1.0, -1.0 * 4.0); // Limiters, these need to be high precision RCPs. - FfxFloat32 hitMinR = mn4R * ffxReciprocal(FfxFloat32(4.0) * mx4R); - FfxFloat32 hitMinG = mn4G * ffxReciprocal(FfxFloat32(4.0) * mx4G); - FfxFloat32 hitMinB = mn4B * ffxReciprocal(FfxFloat32(4.0) * mx4B); + const FfxFloat32 lowerLimiterMultiplier = ffxSaturate(eL / ffxMin(ffxMin3(bL, dL, fL), hL)); + FfxFloat32 hitMinR = mn4R * ffxReciprocal(FfxFloat32(4.0) * mx4R) * lowerLimiterMultiplier; + FfxFloat32 hitMinG = mn4G * ffxReciprocal(FfxFloat32(4.0) * mx4G) * lowerLimiterMultiplier; + FfxFloat32 hitMinB = mn4B * ffxReciprocal(FfxFloat32(4.0) * mx4B) * lowerLimiterMultiplier; FfxFloat32 hitMaxR = (peakC.x - mx4R) * ffxReciprocal(FfxFloat32(4.0) * mn4R + peakC.y); FfxFloat32 hitMaxG = (peakC.x - mx4G) * ffxReciprocal(FfxFloat32(4.0) * mn4G + peakC.y); FfxFloat32 hitMaxB = (peakC.x - mx4B) * ffxReciprocal(FfxFloat32(4.0) * mn4B + peakC.y); @@ -848,9 +849,10 @@ void FsrEasuH( // Immediate constants for peak range. FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0); // Limiters, these need to be high precision RCPs. - FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4R); - FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4G); - FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4B); + const FfxFloat16 lowerLimiterMultiplier = ffxSaturate(eL / min(ffxMin3Half(bL, dL, fL), hL)); + FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4R) * lowerLimiterMultiplier; + FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4G) * lowerLimiterMultiplier; + FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4B) * lowerLimiterMultiplier; FfxFloat16 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4R+peakC.y); FfxFloat16 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4G+peakC.y); FfxFloat16 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4B+peakC.y); @@ -967,9 +969,10 @@ void FsrEasuH( // Immediate constants for peak range. FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0); // Limiters, these need to be high precision RCPs. - FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4R); - FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4G); - FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4B); + const FfxFloat16x2 lowerLimiterMultiplier = ffxSaturate(eL / min(ffxMin3Half(bL, dL, fL), hL)); + FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4R) * lowerLimiterMultiplier; + FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4G) * lowerLimiterMultiplier; + FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4B) * lowerLimiterMultiplier; FfxFloat16x2 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4R+peakC.y); FfxFloat16x2 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4G+peakC.y); FfxFloat16x2 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4B+peakC.y);