From 38a2164a52444056a87c8cb6149bc6221dffb2f6 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Fri, 17 Feb 2023 12:54:23 +0100 Subject: [PATCH] Updated shader sources to FSR 2.2 --- .../Resources/FSR2/shaders/ffx_common_types.h | 2 +- Assets/Resources/FSR2/shaders/ffx_core.h | 2 +- Assets/Resources/FSR2/shaders/ffx_core_cpu.h | 2 +- Assets/Resources/FSR2/shaders/ffx_core_glsl.h | 109 ++- .../FSR2/shaders/ffx_core_gpu_common.h | 2 +- .../FSR2/shaders/ffx_core_gpu_common_half.h | 2 +- Assets/Resources/FSR2/shaders/ffx_core_hlsl.h | 114 ++- .../FSR2/shaders/ffx_core_portability.h | 2 +- .../FSR2/shaders/ffx_fsr2_accumulate.h | 307 ++++---- .../shaders/ffx_fsr2_accumulate_pass.glsl | 46 +- .../shaders/ffx_fsr2_accumulate_pass.hlsl | 43 +- .../ffx_fsr2_autogen_reactive_pass.glsl | 22 +- .../ffx_fsr2_autogen_reactive_pass.hlsl | 24 +- .../FSR2/shaders/ffx_fsr2_callbacks_glsl.h | 628 ++++++++------- .../FSR2/shaders/ffx_fsr2_callbacks_hlsl.h | 743 ++++++++---------- .../Resources/FSR2/shaders/ffx_fsr2_common.h | 347 +++++--- .../ffx_fsr2_compute_luminance_pyramid.h | 15 +- ...x_fsr2_compute_luminance_pyramid_pass.glsl | 47 +- ...x_fsr2_compute_luminance_pyramid_pass.hlsl | 75 +- .../FSR2/shaders/ffx_fsr2_depth_clip.h | 251 ++++-- .../shaders/ffx_fsr2_depth_clip_pass.glsl | 25 +- .../shaders/ffx_fsr2_depth_clip_pass.hlsl | 21 +- Assets/Resources/FSR2/shaders/ffx_fsr2_lock.h | 59 +- .../FSR2/shaders/ffx_fsr2_lock_pass.glsl | 17 +- .../FSR2/shaders/ffx_fsr2_lock_pass.hlsl | 15 +- .../ffx_fsr2_postprocess_lock_status.h | 78 +- Assets/Resources/FSR2/shaders/ffx_fsr2_rcas.h | 58 +- .../FSR2/shaders/ffx_fsr2_rcas_pass.glsl | 20 +- .../FSR2/shaders/ffx_fsr2_rcas_pass.hlsl | 19 +- ...ruct_dilated_velocity_and_previous_depth.h | 121 +-- ..._fsr2_reconstruct_previous_depth_pass.glsl | 29 +- ..._fsr2_reconstruct_previous_depth_pass.hlsl | 21 +- .../FSR2/shaders/ffx_fsr2_reproject.h | 57 +- .../FSR2/shaders/ffx_fsr2_resources.h | 119 +-- .../Resources/FSR2/shaders/ffx_fsr2_sample.h | 23 +- .../FSR2/shaders/ffx_fsr2_tcr_autogen.h | 250 ++++++ .../FSR2/shaders/ffx_fsr2_tcr_autogen.h.meta | 27 + .../shaders/ffx_fsr2_tcr_autogen_pass.glsl | 116 +++ .../ffx_fsr2_tcr_autogen_pass.glsl.meta | 7 + .../shaders/ffx_fsr2_tcr_autogen_pass.hlsl | 114 +++ .../ffx_fsr2_tcr_autogen_pass.hlsl.meta | 7 + .../FSR2/shaders/ffx_fsr2_upsample.h | 158 ++-- Assets/Resources/FSR2/shaders/ffx_spd.h | 2 +- 43 files changed, 2461 insertions(+), 1685 deletions(-) create mode 100644 Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen.h create mode 100644 Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen.h.meta create mode 100644 Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen_pass.glsl create mode 100644 Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen_pass.glsl.meta create mode 100644 Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen_pass.hlsl create mode 100644 Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen_pass.hlsl.meta diff --git a/Assets/Resources/FSR2/shaders/ffx_common_types.h b/Assets/Resources/FSR2/shaders/ffx_common_types.h index cf6ba99..ddd1786 100644 --- a/Assets/Resources/FSR2/shaders/ffx_common_types.h +++ b/Assets/Resources/FSR2/shaders/ffx_common_types.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/Assets/Resources/FSR2/shaders/ffx_core.h b/Assets/Resources/FSR2/shaders/ffx_core.h index 3a66f44..4e687d6 100644 --- a/Assets/Resources/FSR2/shaders/ffx_core.h +++ b/Assets/Resources/FSR2/shaders/ffx_core.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/Assets/Resources/FSR2/shaders/ffx_core_cpu.h b/Assets/Resources/FSR2/shaders/ffx_core_cpu.h index 9bb9915..3bf0295 100644 --- a/Assets/Resources/FSR2/shaders/ffx_core_cpu.h +++ b/Assets/Resources/FSR2/shaders/ffx_core_cpu.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/Assets/Resources/FSR2/shaders/ffx_core_glsl.h b/Assets/Resources/FSR2/shaders/ffx_core_glsl.h index e419e39..6ec58f3 100644 --- a/Assets/Resources/FSR2/shaders/ffx_core_glsl.h +++ b/Assets/Resources/FSR2/shaders/ffx_core_glsl.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -829,6 +829,79 @@ FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) return max(min(x, y), min(max(x, y), z)); } +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_I32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSL +FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_I32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSL +FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_I32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSL +FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_I32 operation on +/// GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup GLSL +FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + + /// Compute the minimum of three values. /// /// NOTE: This function should compile down to a single V_MIN3_F32 operation on @@ -1400,6 +1473,40 @@ FfxFloat16x4 ffxLerp(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 a) return mix(x, y, a); } //------------------------------------------------------------------------------------------------------------------------------ +// No packed version of ffxMid3. +FfxFloat16 ffxMed3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxFloat16x2 ffxMed3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxFloat16x3 ffxMed3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxFloat16x4 ffxMed3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxInt16 ffxMed3Half(FfxInt16 x, FfxInt16 y, FfxInt16 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxInt16x2 ffxMed3Half(FfxInt16x2 x, FfxInt16x2 y, FfxInt16x2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxInt16x3 ffxMed3Half(FfxInt16x3 x, FfxInt16x3 y, FfxInt16x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxInt16x4 ffxMed3Half(FfxInt16x4 x, FfxInt16x4 y, FfxInt16x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +//------------------------------------------------------------------------------------------------------------------------------ // No packed version of ffxMax3. FfxFloat16 ffxMax3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) { diff --git a/Assets/Resources/FSR2/shaders/ffx_core_gpu_common.h b/Assets/Resources/FSR2/shaders/ffx_core_gpu_common.h index 3a49c55..ae07642 100644 --- a/Assets/Resources/FSR2/shaders/ffx_core_gpu_common.h +++ b/Assets/Resources/FSR2/shaders/ffx_core_gpu_common.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/Assets/Resources/FSR2/shaders/ffx_core_gpu_common_half.h b/Assets/Resources/FSR2/shaders/ffx_core_gpu_common_half.h index 63105be..c46ccb3 100644 --- a/Assets/Resources/FSR2/shaders/ffx_core_gpu_common_half.h +++ b/Assets/Resources/FSR2/shaders/ffx_core_gpu_common_half.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/Assets/Resources/FSR2/shaders/ffx_core_hlsl.h b/Assets/Resources/FSR2/shaders/ffx_core_hlsl.h index f114687..ad4ff65 100644 --- a/Assets/Resources/FSR2/shaders/ffx_core_hlsl.h +++ b/Assets/Resources/FSR2/shaders/ffx_core_hlsl.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -916,9 +916,81 @@ FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) return max(min(x, y), min(max(x, y), z)); } +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z) +{ + return max(min(x, y), min(max(x, y), z)); + // return min(max(min(y, z), x), max(y, z)); + // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z) +{ + return max(min(x, y), min(max(x, y), z)); + // return min(max(min(y, z), x), max(y, z)); + // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_I32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + /// Compute the minimum of three values. /// -/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// NOTE: This function should compile down to a single V_MIN3_I32 operation on GCN/RDNA hardware. /// /// @param [in] x The first value to include in the min calculation. /// @param [in] y The second value to include in the min calcuation. @@ -935,7 +1007,7 @@ FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) /// Compute the minimum of three values. /// -/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// NOTE: This function should compile down to a single V_MIN3_I32 operation on GCN/RDNA hardware. /// /// @param [in] x The first value to include in the min calculation. /// @param [in] y The second value to include in the min calcuation. @@ -952,7 +1024,7 @@ FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) /// Compute the minimum of three values. /// -/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// NOTE: This function should compile down to a single V_MIN3_I32 operation on GCN/RDNA hardware. /// /// @param [in] x The first value to include in the min calculation. /// @param [in] y The second value to include in the min calcuation. @@ -1268,6 +1340,40 @@ FFX_MIN16_F4 ffxMin3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z) return min(x, min(y, z)); } //------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxMed3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_F2 ffxMed3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_F3 ffxMed3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_F4 ffxMed3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_I ffxMed3Half(FFX_MIN16_I x, FFX_MIN16_I y, FFX_MIN16_I z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_I2 ffxMed3Half(FFX_MIN16_I2 x, FFX_MIN16_I2 y, FFX_MIN16_I2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_I3 ffxMed3Half(FFX_MIN16_I3 x, FFX_MIN16_I3 y, FFX_MIN16_I3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_I4 ffxMed3Half(FFX_MIN16_I4 x, FFX_MIN16_I4 y, FFX_MIN16_I4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +//------------------------------------------------------------------------------------------------------------------------------ FFX_MIN16_F ffxReciprocalHalf(FFX_MIN16_F x) { return rcp(x); diff --git a/Assets/Resources/FSR2/shaders/ffx_core_portability.h b/Assets/Resources/FSR2/shaders/ffx_core_portability.h index f0d3fd7..45be059 100644 --- a/Assets/Resources/FSR2/shaders/ffx_core_portability.h +++ b/Assets/Resources/FSR2/shaders/ffx_core_portability.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_accumulate.h b/Assets/Resources/FSR2/shaders/ffx_fsr2_accumulate.h index d0c5eae..1c5cd16 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_accumulate.h +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_accumulate.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -22,8 +22,6 @@ #ifndef FFX_FSR2_ACCUMULATE_H #define FFX_FSR2_ACCUMULATE_H -#define FFX_FSR2_OPTION_GUARANTEE_UPSAMPLE_WEIGHT_ON_NEW_SAMPLES 1 - FfxFloat32 GetPxHrVelocity(FfxFloat32x2 fMotionVector) { return length(fMotionVector * DisplaySize()); @@ -35,31 +33,41 @@ FFX_MIN16_F GetPxHrVelocity(FFX_MIN16_F2 fMotionVector) } #endif -void Accumulate(FfxInt32x2 iPxHrPos, FFX_PARAMETER_INOUT FfxFloat32x4 fHistory, FFX_PARAMETER_IN FfxFloat32x4 fUpsampled, FFX_PARAMETER_IN FfxFloat32 fDepthClipFactor, FFX_PARAMETER_IN FfxFloat32 fHrVelocity) +void Accumulate(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor, FfxFloat32x3 fAccumulation, FFX_PARAMETER_IN FfxFloat32x4 fUpsampledColorAndWeight) { - fHistory.w = fHistory.w + fUpsampled.w; + // Aviod invalid values when accumulation and upsampled weight is 0 + fAccumulation = ffxMax(FSR2_EPSILON.xxx, fAccumulation + fUpsampledColorAndWeight.www); - fUpsampled.rgb = YCoCgToRGB(fUpsampled.rgb); +#if FFX_FSR2_OPTION_HDR_COLOR_INPUT + //YCoCg -> RGB -> Tonemap -> YCoCg (Use RGB tonemapper to avoid color desaturation) + fUpsampledColorAndWeight.xyz = RGBToYCoCg(Tonemap(YCoCgToRGB(fUpsampledColorAndWeight.xyz))); + fHistoryColor = RGBToYCoCg(Tonemap(YCoCgToRGB(fHistoryColor))); +#endif - const FfxFloat32 fAlpha = fUpsampled.w / fHistory.w; - fHistory.rgb = ffxLerp(fHistory.rgb, fUpsampled.rgb, fAlpha); + const FfxFloat32x3 fAlpha = fUpsampledColorAndWeight.www / fAccumulation; + fHistoryColor = ffxLerp(fHistoryColor, fUpsampledColorAndWeight.xyz, fAlpha); - FfxFloat32 fMaxAverageWeight = FfxFloat32(ffxLerp(MaxAccumulationWeight(), accumulationMaxOnMotion, ffxSaturate(fHrVelocity * 10.0f))); - fHistory.w = ffxMin(fHistory.w, fMaxAverageWeight); + fHistoryColor = YCoCgToRGB(fHistoryColor); + +#if FFX_FSR2_OPTION_HDR_COLOR_INPUT + fHistoryColor = InverseTonemap(fHistoryColor); +#endif } void RectifyHistory( - RectificationBoxData clippingBox, - inout FfxFloat32x4 fHistory, - FFX_PARAMETER_IN FfxFloat32x3 fLockStatus, - FFX_PARAMETER_IN FfxFloat32 fDepthClipFactor, - FFX_PARAMETER_IN FfxFloat32 fLumaStabilityFactor, - FFX_PARAMETER_IN FfxFloat32 fLuminanceDiff, - FFX_PARAMETER_IN FfxFloat32 fUpsampleWeight, - FFX_PARAMETER_IN FfxFloat32 fLockContributionThisFrame) + const AccumulationPassCommonParams params, + RectificationBox clippingBox, + FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor, + FFX_PARAMETER_INOUT FfxFloat32x3 fAccumulation, + FfxFloat32 fLockContributionThisFrame, + FfxFloat32 fTemporalReactiveFactor, + FfxFloat32 fLumaInstabilityFactor) { - FfxFloat32 fScaleFactorInfluence = FfxFloat32(1.0f / DownscaleFactor().x - 1); - FfxFloat32 fBoxScale = FfxFloat32(1.0f) + (FfxFloat32(0.5f) * fScaleFactorInfluence); + FfxFloat32 fScaleFactorInfluence = ffxMin(20.0f, ffxPow(FfxFloat32(1.0f / length(DownscaleFactor().x * DownscaleFactor().y)), 3.0f)); + + const FfxFloat32 fVecolityFactor = ffxSaturate(params.fHrVelocity / 20.0f); + const FfxFloat32 fBoxScaleT = ffxMax(params.fDepthClipFactor, ffxMax(params.fAccumulationMask, fVecolityFactor)); + FfxFloat32 fBoxScale = ffxLerp(fScaleFactorInfluence, 1.0f, fBoxScaleT); FfxFloat32x3 fScaledBoxVec = clippingBox.boxVec * fBoxScale; FfxFloat32x3 boxMin = clippingBox.boxCenter - fScaledBoxVec; @@ -70,26 +78,22 @@ void RectifyHistory( boxMin = ffxMax(clippingBox.aabbMin, boxMin); boxMax = ffxMin(clippingBox.aabbMax, boxMax); - FfxFloat32x3 distToClampOutside = ffxMax(ffxMax(FfxFloat32x3(0, 0, 0), boxMin - fHistory.xyz), ffxMax(FfxFloat32x3(0, 0, 0), fHistory.xyz - boxMax)); - - if (any(FFX_GREATER_THAN(distToClampOutside, FfxFloat32x3(0, 0, 0)))) { + if (any(FFX_GREATER_THAN(boxMin, fHistoryColor)) || any(FFX_GREATER_THAN(fHistoryColor, boxMax))) { - const FfxFloat32x3 clampedHistorySample = clamp(fHistory.xyz, boxMin, boxMax); + const FfxFloat32x3 fClampedHistoryColor = clamp(fHistoryColor, boxMin, boxMax); - FfxFloat32x3 clippedHistoryToBoxCenter = abs(clampedHistorySample - boxCenter); - FfxFloat32x3 historyToBoxCenter = abs(fHistory.xyz - boxCenter); - FfxFloat32x3 HistoryColorWeight; - HistoryColorWeight.x = historyToBoxCenter.x > FfxFloat32(0) ? clippedHistoryToBoxCenter.x / historyToBoxCenter.x : FfxFloat32(0.0f); - HistoryColorWeight.y = historyToBoxCenter.y > FfxFloat32(0) ? clippedHistoryToBoxCenter.y / historyToBoxCenter.y : FfxFloat32(0.0f); - HistoryColorWeight.z = historyToBoxCenter.z > FfxFloat32(0) ? clippedHistoryToBoxCenter.z / historyToBoxCenter.z : FfxFloat32(0.0f); + FfxFloat32x3 fHistoryContribution = ffxMax(fLumaInstabilityFactor, fLockContributionThisFrame).xxx; + + const FfxFloat32 fReactiveFactor = params.fDilatedReactiveFactor; + const FfxFloat32 fReactiveContribution = 1.0f - ffxPow(fReactiveFactor, 1.0f / 2.0f); + fHistoryContribution *= fReactiveContribution; - FfxFloat32x3 fHistoryContribution = HistoryColorWeight; + // Scale history color using rectification info, also using accumulation mask to avoid potential invalid color protection + fHistoryColor = ffxLerp(fClampedHistoryColor, fHistoryColor, ffxSaturate(fHistoryContribution)); - // only lock luma - fHistoryContribution += ffxMax(fLockContributionThisFrame, fLumaStabilityFactor).xxx; - fHistoryContribution *= (fDepthClipFactor * fDepthClipFactor); - - fHistory.xyz = ffxLerp(clampedHistorySample.xyz, fHistory.xyz, ffxSaturate(fHistoryContribution)); + // Scale accumulation using rectification info + const FfxFloat32x3 fAccumulationMin = ffxMin(fAccumulation, FFX_BROADCAST_FLOAT32X3(0.1f)); + fAccumulation = ffxLerp(fAccumulationMin, fAccumulation, ffxSaturate(fHistoryContribution)); } } @@ -98,166 +102,189 @@ void WriteUpscaledOutput(FfxInt32x2 iPxHrPos, FfxFloat32x3 fUpscaledColor) StoreUpscaledOutput(iPxHrPos, fUpscaledColor); } -FfxFloat32 GetLumaStabilityFactor(FfxFloat32x2 fHrUv, FfxFloat32 fHrVelocity) +void FinalizeLockStatus(const AccumulationPassCommonParams params, FfxFloat32x2 fLockStatus, FfxFloat32 fUpsampledWeight) { - FfxFloat32 fLumaStabilityFactor = SampleLumaStabilityFactor(fHrUv); - - // Only apply on still, have to reproject luma history resource if we want it to work on motion - fLumaStabilityFactor *= FfxFloat32(fHrVelocity < 0.1f); + // we expect similar motion for next frame + // kill lock if that location is outside screen, avoid locks to be clamped to screen borders + FfxFloat32x2 fEstimatedUvNextFrame = params.fHrUv - params.fMotionVector; + if (IsUvInside(fEstimatedUvNextFrame) == false) { + KillLock(fLockStatus); + } + else { + // Decrease lock lifetime + const FfxFloat32 fLifetimeDecreaseLanczosMax = FfxFloat32(JitterSequenceLength()) * FfxFloat32(fAverageLanczosWeightPerFrame); + const FfxFloat32 fLifetimeDecrease = FfxFloat32(fUpsampledWeight / fLifetimeDecreaseLanczosMax); + fLockStatus[LOCK_LIFETIME_REMAINING] = ffxMax(FfxFloat32(0), fLockStatus[LOCK_LIFETIME_REMAINING] - fLifetimeDecrease); + } - return fLumaStabilityFactor; + StoreLockStatus(params.iPxHrPos, fLockStatus); } -FfxFloat32 GetLockContributionThisFrame(FfxFloat32x2 fUvCoord, FfxFloat32 fAccumulationMask, FfxFloat32 fParticleMask, FfxFloat32x3 fLockStatus) + +FfxFloat32x3 ComputeBaseAccumulationWeight(const AccumulationPassCommonParams params, FfxFloat32 fThisFrameReactiveFactor, FfxBoolean bInMotionLastFrame, FfxFloat32 fUpsampledWeight, LockState lockState) { - const FfxFloat32 fNormalizedLockLifetime = GetNormalizedRemainingLockLifetime(fLockStatus); + // Always assume max accumulation was reached + FfxFloat32 fBaseAccumulation = fMaxAccumulationLanczosWeight * FfxFloat32(params.bIsExistingSample) * (1.0f - fThisFrameReactiveFactor) * (1.0f - params.fDepthClipFactor); - // Rectify on lock frame - FfxFloat32 fLockContributionThisFrame = ffxSaturate(fNormalizedLockLifetime * FfxFloat32(4)); + fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight * 10.0f, ffxMax(FfxFloat32(bInMotionLastFrame), ffxSaturate(params.fHrVelocity * FfxFloat32(10))))); - return fLockContributionThisFrame; + fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight, ffxSaturate(params.fHrVelocity / FfxFloat32(20)))); + + return fBaseAccumulation.xxx; } -void FinalizeLockStatus(FfxInt32x2 iPxHrPos, FfxFloat32x3 fLockStatus, FfxFloat32 fUpsampledWeight) +FfxFloat32 ComputeLumaInstabilityFactor(const AccumulationPassCommonParams params, RectificationBox clippingBox, FfxFloat32 fThisFrameReactiveFactor, FfxFloat32 fLuminanceDiff) { - // Increase trust - const FfxFloat32 fTrustIncreaseLanczosMax = FfxFloat32(12); // same increase no matter the MaxAccumulationWeight() value. - const FfxFloat32 fTrustIncrease = FfxFloat32(fUpsampledWeight / fTrustIncreaseLanczosMax); - fLockStatus[LOCK_TRUST] = ffxMin(FfxFloat32(1), fLockStatus[LOCK_TRUST] + fTrustIncrease); + const FfxInt32 N_MINUS_1 = 0; + const FfxInt32 N_MINUS_2 = 1; + const FfxInt32 N_MINUS_3 = 2; + const FfxInt32 N_MINUS_4 = 3; - // Decrease lock lifetime - const FfxFloat32 fLifetimeDecreaseLanczosMax = FfxFloat32(JitterSequenceLength()) * FfxFloat32(averageLanczosWeightPerFrame); - const FfxFloat32 fLifetimeDecrease = FfxFloat32(fUpsampledWeight / fLifetimeDecreaseLanczosMax); - fLockStatus[LOCK_LIFETIME_REMAINING] = ffxMax(FfxFloat32(0), fLockStatus[LOCK_LIFETIME_REMAINING] - fLifetimeDecrease); + FfxFloat32 fCurrentFrameLuma = clippingBox.boxCenter.x; - StoreLockStatus(iPxHrPos, fLockStatus); -} +#if FFX_FSR2_OPTION_HDR_COLOR_INPUT + fCurrentFrameLuma = fCurrentFrameLuma / (1.0f + ffxMax(0.0f, fCurrentFrameLuma)); +#endif -FfxFloat32 ComputeMaxAccumulationWeight(FfxFloat32 fHrVelocity, FfxFloat32 fReactiveMax, FfxFloat32 fDepthClipFactor, FfxFloat32 fLuminanceDiff, LockState lockState) { + fCurrentFrameLuma = round(fCurrentFrameLuma * 255.0f) / 255.0f; - FfxFloat32 normalizedMinimum = FfxFloat32(accumulationMaxOnMotion) / FfxFloat32(MaxAccumulationWeight()); + const FfxBoolean bSampleLumaHistory = (ffxMax(ffxMax(params.fDepthClipFactor, params.fAccumulationMask), fLuminanceDiff) < 0.1f) && (params.bIsNewSample == false); + FfxFloat32x4 fCurrentFrameLumaHistory = bSampleLumaHistory ? SampleLumaHistory(params.fReprojectedHrUv) : FFX_BROADCAST_FLOAT32X4(0.0f); - FfxFloat32 fReactiveMaxAccumulationWeight = FfxFloat32(1) - fReactiveMax; - FfxFloat32 fMotionMaxAccumulationWeight = ffxLerp(FfxFloat32(1), normalizedMinimum, ffxSaturate(fHrVelocity * FfxFloat32(10))); - FfxFloat32 fDepthClipMaxAccumulationWeight = fDepthClipFactor; + FfxFloat32 fLumaInstability = 0.0f; + FfxFloat32 fDiffs0 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[N_MINUS_1]); - FfxFloat32 fLuminanceDiffMaxAccumulationWeight = ffxSaturate(ffxMax(normalizedMinimum, FfxFloat32(1) - fLuminanceDiff)); + FfxFloat32 fMin = abs(fDiffs0); - FfxFloat32 maxAccumulation = FfxFloat32(MaxAccumulationWeight()) * ffxMin( - ffxMin(fReactiveMaxAccumulationWeight, fMotionMaxAccumulationWeight), - ffxMin(fDepthClipMaxAccumulationWeight, fLuminanceDiffMaxAccumulationWeight) - ); + if (fMin >= (1.0f / 255.0f)) { + for (int i = N_MINUS_2; i <= N_MINUS_4; i++) { + FfxFloat32 fDiffs1 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[i]); - return (lockState.NewLock && !lockState.WasLockedPrevFrame) ? FfxFloat32(accumulationMaxOnMotion) : maxAccumulation; -} + if (sign(fDiffs0) == sign(fDiffs1)) { + + // Scale difference to protect historically similar values + const FfxFloat32 fMinBias = 1.0f; + fMin = ffxMin(fMin, abs(fDiffs1) * fMinBias); + } + } + + fLumaInstability = FfxFloat32(fMin != abs(fDiffs0)); -FfxFloat32x2 ComputeKernelWeight(in FfxFloat32 fHistoryWeight, in FfxFloat32 fDepthClipFactor, in FfxFloat32 fReactivityFactor) { - FfxFloat32 fKernelSizeBias = ffxSaturate(ffxMax(FfxFloat32(0), fHistoryWeight - FfxFloat32(0.5)) / FfxFloat32(3)); + fLumaInstability *= 1.0f - ffxMax(params.fAccumulationMask, ffxPow(fThisFrameReactiveFactor, 1.0f / 3.0f)); + fLumaInstability *= ffxLerp(1.0f, 0.0f, ffxSaturate(params.fHrVelocity / 20.0f)); + } - FfxFloat32 fOneMinusReactiveMax = FfxFloat32(1) - fReactivityFactor; - FfxFloat32x2 fKernelWeight = FfxFloat32(1) + (FfxFloat32(1.0f) / FfxFloat32x2(DownscaleFactor()) - FfxFloat32(1)) * FfxFloat32(fKernelSizeBias) * fOneMinusReactiveMax; + //shift history + fCurrentFrameLumaHistory[N_MINUS_4] = fCurrentFrameLumaHistory[N_MINUS_3]; + fCurrentFrameLumaHistory[N_MINUS_3] = fCurrentFrameLumaHistory[N_MINUS_2]; + fCurrentFrameLumaHistory[N_MINUS_2] = fCurrentFrameLumaHistory[N_MINUS_1]; + fCurrentFrameLumaHistory[N_MINUS_1] = fCurrentFrameLuma; - //average value on disocclusion, to help decrease high value sample importance wait for accumulation to kick in - fKernelWeight *= FfxFloat32x2(0.5f, 0.5f) + fDepthClipFactor * FfxFloat32x2(0.5f, 0.5f); + StoreLumaHistory(params.iPxHrPos, fCurrentFrameLumaHistory); - return ffxMin(FfxFloat32x2(1.99f, 1.99f), fKernelWeight); + return fLumaInstability * FfxFloat32(fCurrentFrameLumaHistory[N_MINUS_4] != 0); } -void Accumulate(FfxInt32x2 iPxHrPos) +FfxFloat32 ComputeTemporalReactiveFactor(const AccumulationPassCommonParams params, FfxFloat32 fTemporalReactiveFactor) { - const FfxFloat32x2 fSamplePosHr = iPxHrPos + 0.5f; - const FfxFloat32x2 fPxLrPos = fSamplePosHr * DownscaleFactor(); // Source resolution output pixel center position - const FfxInt32x2 iPxLrPos = FfxInt32x2(floor(fPxLrPos)); // TODO: what about weird upscale factors... - - const FfxFloat32x2 fSamplePosUnjitterLr = (FfxFloat32x2(iPxLrPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0 + FfxFloat32 fNewFactor = ffxMin(0.99f, fTemporalReactiveFactor); - const FfxFloat32x2 fLrUvJittered = (fPxLrPos + Jitter()) / RenderSize(); + fNewFactor = ffxMax(fNewFactor, ffxLerp(fNewFactor, 0.4f, ffxSaturate(params.fHrVelocity))); - const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize(); - const FfxFloat32x2 fMotionVector = GetMotionVector(iPxHrPos, fHrUv); - - const FfxFloat32 fHrVelocity = GetPxHrVelocity(fMotionVector); - const FfxFloat32 fDepthClipFactor = ffxSaturate(SampleDepthClip(fLrUvJittered)); - const FfxFloat32 fLumaStabilityFactor = GetLumaStabilityFactor(fHrUv, fHrVelocity); - const FfxFloat32x2 fDilatedReactiveMasks = SampleDilatedReactiveMasks(fLrUvJittered); - const FfxFloat32 fReactiveMax = fDilatedReactiveMasks.x; - const FfxFloat32 fAccumulationMask = fDilatedReactiveMasks.y; - const FfxBoolean bIsResetFrame = (0 == FrameIndex()); - - FfxFloat32x4 fHistoryColorAndWeight = FfxFloat32x4(0, 0, 0, 0); - FfxFloat32x3 fLockStatus; - InitializeNewLockSample(fLockStatus); - FfxBoolean bIsExistingSample = FFX_TRUE; + fNewFactor = ffxMax(fNewFactor * fNewFactor, ffxMax(params.fDepthClipFactor * 0.1f, params.fDilatedReactiveFactor)); - FfxFloat32x2 fReprojectedHrUv = FfxFloat32x2(0, 0); - ComputeReprojectedUVs(iPxHrPos, fMotionVector, fReprojectedHrUv, bIsExistingSample); + // Force reactive factor for new samples + fNewFactor = params.bIsNewSample ? 1.0f : fNewFactor; - if (bIsExistingSample && !bIsResetFrame) { - ReprojectHistoryColor(iPxHrPos, fReprojectedHrUv, fHistoryColorAndWeight); - ReprojectHistoryLockStatus(iPxHrPos, fReprojectedHrUv, fLockStatus); + if (ffxSaturate(params.fHrVelocity * 10.0f) >= 1.0f) { + fNewFactor = ffxMax(FSR2_EPSILON, fNewFactor) * -1.0f; } + + return fNewFactor; +} - FfxFloat32 fLuminanceDiff = FfxFloat32(0.0f); +AccumulationPassCommonParams InitParams(FfxInt32x2 iPxHrPos) +{ + AccumulationPassCommonParams params; - LockState lockState = PostProcessLockStatus(iPxHrPos, fLrUvJittered, FfxFloat32(fDepthClipFactor), fAccumulationMask, fHrVelocity, fHistoryColorAndWeight.w, fLockStatus, fLuminanceDiff); + params.iPxHrPos = iPxHrPos; + const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize(); + params.fHrUv = fHrUv; + + const FfxFloat32x2 fLrUvJittered = fHrUv + Jitter() / RenderSize(); + params.fLrUv_HwSampler = ClampUv(fLrUvJittered, RenderSize(), MaxRenderSize()); - fHistoryColorAndWeight.w = ffxMin(fHistoryColorAndWeight.w, ComputeMaxAccumulationWeight( - FfxFloat32(fHrVelocity), fReactiveMax, FfxFloat32(fDepthClipFactor), FfxFloat32(fLuminanceDiff), lockState - )); + params.fMotionVector = GetMotionVector(iPxHrPos, fHrUv); + params.fHrVelocity = GetPxHrVelocity(params.fMotionVector); - const FfxFloat32 fNormalizedLockLifetime = GetNormalizedRemainingLockLifetime(fLockStatus); + ComputeReprojectedUVs(params, params.fReprojectedHrUv, params.bIsExistingSample); - // Kill accumulation based on shading change - fHistoryColorAndWeight.w = ffxMin(fHistoryColorAndWeight.w, FfxFloat32(ffxMax(0.0f, MaxAccumulationWeight() * ffxPow(FfxFloat32(1) - fLuminanceDiff, 2.0f / 1.0f)))); + params.fDepthClipFactor = ffxSaturate(SampleDepthClip(params.fLrUv_HwSampler)); + + const FfxFloat32x2 fDilatedReactiveMasks = SampleDilatedReactiveMasks(params.fLrUv_HwSampler); + params.fDilatedReactiveFactor = fDilatedReactiveMasks.x; + params.fAccumulationMask = fDilatedReactiveMasks.y; + params.bIsResetFrame = (0 == FrameIndex()); - // Load upsampled input color - RectificationBoxData clippingBox; + params.bIsNewSample = (params.bIsExistingSample == false || params.bIsResetFrame); - FfxFloat32 fKernelBias = fAccumulationMask * ffxSaturate(ffxMax(0.0f, fHistoryColorAndWeight.w - 0.5f) / 3.0f); + return params; +} - FfxFloat32 fReactiveWeighted = 0; +void Accumulate(FfxInt32x2 iPxHrPos) +{ + const AccumulationPassCommonParams params = InitParams(iPxHrPos); - // No trust in reactive areas - fLockStatus[LOCK_TRUST] = ffxMin(fLockStatus[LOCK_TRUST], FfxFloat32(1.0f) - FfxFloat32(pow(fReactiveMax, 1.0f / 3.0f))); - fLockStatus[LOCK_TRUST] = ffxMin(fLockStatus[LOCK_TRUST], FfxFloat32(fDepthClipFactor)); + FfxFloat32x3 fHistoryColor = FfxFloat32x3(0, 0, 0); + FfxFloat32x2 fLockStatus; + InitializeNewLockSample(fLockStatus); - FfxFloat32x2 fKernelWeight = ComputeKernelWeight(fHistoryColorAndWeight.w, FfxFloat32(fDepthClipFactor), ffxMax((FfxFloat32(1) - fLockStatus[LOCK_TRUST]), fReactiveMax)); + FfxFloat32 fTemporalReactiveFactor = 0.0f; + FfxBoolean bInMotionLastFrame = FFX_FALSE; + LockState lockState = { FFX_FALSE , FFX_FALSE }; + if (params.bIsExistingSample && !params.bIsResetFrame) { + ReprojectHistoryColor(params, fHistoryColor, fTemporalReactiveFactor, bInMotionLastFrame); + lockState = ReprojectHistoryLockStatus(params, fLockStatus); + } - FfxFloat32x4 fUpsampledColorAndWeight = ComputeUpsampledColorAndWeight(iPxHrPos, fKernelWeight, clippingBox); + FfxFloat32 fThisFrameReactiveFactor = ffxMax(params.fDilatedReactiveFactor, fTemporalReactiveFactor); -#if FFX_FSR2_OPTION_GUARANTEE_UPSAMPLE_WEIGHT_ON_NEW_SAMPLES - // Make sure all samples have same weight on reset/first frame. Upsampled weight should never be 0.0f when history accumulation is 0.0f. - fUpsampledColorAndWeight.w = (fHistoryColorAndWeight.w == 0.0f) ? ffxMax(FSR2_EPSILON, fUpsampledColorAndWeight.w) : fUpsampledColorAndWeight.w; -#endif + FfxFloat32 fLuminanceDiff = 0.0f; + FfxFloat32 fLockContributionThisFrame = 0.0f; + UpdateLockStatus(params, fThisFrameReactiveFactor, lockState, fLockStatus, fLockContributionThisFrame, fLuminanceDiff); - FfxFloat32 fLockContributionThisFrame = GetLockContributionThisFrame(fHrUv, fAccumulationMask, fReactiveMax, fLockStatus); + // Load upsampled input color + RectificationBox clippingBox; + FfxFloat32x4 fUpsampledColorAndWeight = ComputeUpsampledColorAndWeight(params, clippingBox, fThisFrameReactiveFactor); + + const FfxFloat32 fLumaInstabilityFactor = ComputeLumaInstabilityFactor(params, clippingBox, fThisFrameReactiveFactor, fLuminanceDiff); - // Update accumulation and rectify history - if (fHistoryColorAndWeight.w > FfxFloat32(0)) { - RectifyHistory(clippingBox, fHistoryColorAndWeight, fLockStatus, FfxFloat32(fDepthClipFactor), FfxFloat32(fLumaStabilityFactor), FfxFloat32(fLuminanceDiff), fUpsampledColorAndWeight.w, fLockContributionThisFrame); + FfxFloat32x3 fAccumulation = ComputeBaseAccumulationWeight(params, fThisFrameReactiveFactor, bInMotionLastFrame, fUpsampledColorAndWeight.w, lockState); - fHistoryColorAndWeight.rgb = YCoCgToRGB(fHistoryColorAndWeight.rgb); + if (params.bIsNewSample) { + fHistoryColor = YCoCgToRGB(fUpsampledColorAndWeight.xyz); } + else { + RectifyHistory(params, clippingBox, fHistoryColor, fAccumulation, fLockContributionThisFrame, fThisFrameReactiveFactor, fLumaInstabilityFactor); - Accumulate(iPxHrPos, fHistoryColorAndWeight, fUpsampledColorAndWeight, fDepthClipFactor, fHrVelocity); + Accumulate(params, fHistoryColor, fAccumulation, fUpsampledColorAndWeight); + } - //Subtract accumulation weight in reactive areas - fHistoryColorAndWeight.w -= fUpsampledColorAndWeight.w * fReactiveMax; + fHistoryColor = UnprepareRgb(fHistoryColor, Exposure()); -#if FFX_FSR2_OPTION_HDR_COLOR_INPUT - fHistoryColorAndWeight.rgb = InverseTonemap(fHistoryColorAndWeight.rgb); -#endif - fHistoryColorAndWeight.rgb /= FfxFloat32(Exposure()); + FinalizeLockStatus(params, fLockStatus, fUpsampledColorAndWeight.w); - FinalizeLockStatus(iPxHrPos, fLockStatus, fUpsampledColorAndWeight.w); + // Get new temporal reactive factor + fTemporalReactiveFactor = ComputeTemporalReactiveFactor(params, fThisFrameReactiveFactor); - StoreInternalColorAndWeight(iPxHrPos, fHistoryColorAndWeight); + StoreInternalColorAndWeight(iPxHrPos, FfxFloat32x4(fHistoryColor, fTemporalReactiveFactor)); // Output final color when RCAS is disabled #if FFX_FSR2_OPTION_APPLY_SHARPENING == 0 - WriteUpscaledOutput(iPxHrPos, fHistoryColorAndWeight.rgb); + WriteUpscaledOutput(iPxHrPos, fHistoryColor); #endif + StoreNewLocks(iPxHrPos, 0); } #endif // FFX_FSR2_ACCUMULATE_H diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_accumulate_pass.glsl b/Assets/Resources/FSR2/shaders/ffx_fsr2_accumulate_pass.glsl index e1ee116..6006fd0 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_accumulate_pass.glsl +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_accumulate_pass.glsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -19,50 +19,38 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -// FSR2 pass 5 -// SRV 4 : FSR2_Exposure : r_exposure -// SRV 6 : m_UpscaleTransparencyAndComposition : r_transparency_and_composition_mask -// SRV 8 : FSR2_DilatedVelocity : r_dilated_motion_vectors -// SRV 10 : FSR2_InternalUpscaled2 : r_internal_upscaled_color -// SRV 11 : FSR2_LockStatus2 : r_lock_status -// SRV 12 : FSR2_DepthClip : r_depth_clip -// SRV 13 : FSR2_PreparedInputColor : r_prepared_input_color -// SRV 14 : FSR2_LumaHistory : r_luma_history -// SRV 16 : FSR2_LanczosLutData : r_lanczos_lut -// SRV 26 : FSR2_MaximumUpsampleBias : r_upsample_maximum_bias_lut -// SRV 27 : FSR2_ReactiveMaskMax : r_reactive_max -// SRV 28 : FSR2_ExposureMips : r_imgMips -// UAV 10 : FSR2_InternalUpscaled1 : rw_internal_upscaled_color -// UAV 11 : FSR2_LockStatus1 : rw_lock_status -// UAV 18 : DisplayOutput : rw_upscaled_output -// CB 0 : cbFSR2 -// CB 1 : FSR2DispatchOffsets - #version 450 #extension GL_GOOGLE_include_directive : require #extension GL_EXT_samplerless_texture_functions : require +// Needed for rw_upscaled_output declaration +#extension GL_EXT_shader_image_load_formatted : require -#define FSR2_BIND_SRV_EXPOSURE 0 +#define FSR2_BIND_SRV_INPUT_EXPOSURE 0 #define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS 1 #if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS #define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 2 #else -#define FSR2_BIND_SRV_MOTION_VECTORS 2 +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 2 #endif #define FSR2_BIND_SRV_INTERNAL_UPSCALED 3 #define FSR2_BIND_SRV_LOCK_STATUS 4 -#define FSR2_BIND_SRV_DEPTH_CLIP 5 +#define FSR2_BIND_SRV_INPUT_DEPTH_CLIP 5 #define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 6 -#define FSR2_BIND_SRV_LUMA_HISTORY 7 +#define FSR2_BIND_SRV_LUMA_INSTABILITY 7 #define FSR2_BIND_SRV_LANCZOS_LUT 8 #define FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT 9 -#define FSR2_BIND_SRV_EXPOSURE_MIPS 10 -#define FSR2_BIND_UAV_INTERNAL_UPSCALED 11 -#define FSR2_BIND_UAV_LOCK_STATUS 12 -#define FSR2_BIND_UAV_UPSCALED_OUTPUT 13 +#define FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS 10 +#define FSR2_BIND_SRV_AUTO_EXPOSURE 11 +#define FSR2_BIND_SRV_LUMA_HISTORY 12 + +#define FSR2_BIND_UAV_INTERNAL_UPSCALED 13 +#define FSR2_BIND_UAV_LOCK_STATUS 14 +#define FSR2_BIND_UAV_UPSCALED_OUTPUT 15 +#define FSR2_BIND_UAV_NEW_LOCKS 16 +#define FSR2_BIND_UAV_LUMA_HISTORY 17 -#define FSR2_BIND_CB_FSR2 14 +#define FSR2_BIND_CB_FSR2 18 #include "ffx_fsr2_callbacks_glsl.h" #include "ffx_fsr2_common.h" diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_accumulate_pass.hlsl b/Assets/Resources/FSR2/shaders/ffx_fsr2_accumulate_pass.hlsl index 4321f99..747f380 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_accumulate_pass.hlsl +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_accumulate_pass.hlsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -19,43 +19,27 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -// FSR2 pass 5 -// SRV 4 : FSR2_Exposure : r_exposure -// SRV 6 : m_UpscaleTransparencyAndComposition : r_transparency_and_composition_mask -// SRV 8 : FSR2_DilatedVelocity : r_dilated_motion_vectors -// SRV 10 : FSR2_InternalUpscaled2 : r_internal_upscaled_color -// SRV 11 : FSR2_LockStatus2 : r_lock_status -// SRV 12 : FSR2_DepthClip : r_depth_clip -// SRV 13 : FSR2_PreparedInputColor : r_prepared_input_color -// SRV 14 : FSR2_LumaHistory : r_luma_history -// SRV 16 : FSR2_LanczosLutData : r_lanczos_lut -// SRV 26 : FSR2_MaximumUpsampleBias : r_upsample_maximum_bias_lut -// SRV 27 : FSR2_DilatedReactiveMasks : r_dilated_reactive_masks -// SRV 28 : FSR2_ExposureMips : r_imgMips -// UAV 10 : FSR2_InternalUpscaled1 : rw_internal_upscaled_color -// UAV 11 : FSR2_LockStatus1 : rw_lock_status -// UAV 18 : DisplayOutput : rw_upscaled_output -// CB 0 : cbFSR2 -// CB 1 : FSR2DispatchOffsets - -#define FSR2_BIND_SRV_EXPOSURE 0 +#define FSR2_BIND_SRV_INPUT_EXPOSURE 0 +#define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS 1 #if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS #define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 2 #else -#define FSR2_BIND_SRV_MOTION_VECTORS 2 +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 2 #endif #define FSR2_BIND_SRV_INTERNAL_UPSCALED 3 #define FSR2_BIND_SRV_LOCK_STATUS 4 -#define FSR2_BIND_SRV_DEPTH_CLIP 5 -#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 6 -#define FSR2_BIND_SRV_LUMA_HISTORY 7 -#define FSR2_BIND_SRV_LANCZOS_LUT 8 -#define FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT 9 -#define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS 10 -#define FSR2_BIND_SRV_EXPOSURE_MIPS 11 +#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 5 +#define FSR2_BIND_SRV_LANCZOS_LUT 6 +#define FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT 7 +#define FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS 8 +#define FSR2_BIND_SRV_AUTO_EXPOSURE 9 +#define FSR2_BIND_SRV_LUMA_HISTORY 10 + #define FSR2_BIND_UAV_INTERNAL_UPSCALED 0 #define FSR2_BIND_UAV_LOCK_STATUS 1 #define FSR2_BIND_UAV_UPSCALED_OUTPUT 2 +#define FSR2_BIND_UAV_NEW_LOCKS 3 +#define FSR2_BIND_UAV_LUMA_HISTORY 4 #define FSR2_BIND_CB_FSR2 0 @@ -80,6 +64,7 @@ #define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] #endif // #ifndef FFX_FSR2_NUM_THREADS +FFX_FSR2_PREFER_WAVE64 FFX_FSR2_NUM_THREADS FFX_FSR2_EMBED_ROOTSIG_CONTENT void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_autogen_reactive_pass.glsl b/Assets/Resources/FSR2/shaders/ffx_fsr2_autogen_reactive_pass.glsl index b509eb0..7ae41cf 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_autogen_reactive_pass.glsl +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_autogen_reactive_pass.glsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -24,18 +24,18 @@ #extension GL_GOOGLE_include_directive : require #extension GL_EXT_samplerless_texture_functions : require -#define FSR2_BIND_SRV_PRE_ALPHA_COLOR 0 -#define FSR2_BIND_SRV_POST_ALPHA_COLOR 1 -#define FSR2_BIND_UAV_REACTIVE 2 +#define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY 0 +#define FSR2_BIND_SRV_INPUT_COLOR 1 +#define FSR2_BIND_UAV_AUTOREACTIVE 2 #define FSR2_BIND_CB_REACTIVE 3 #define FSR2_BIND_CB_FSR2 4 #include "ffx_fsr2_callbacks_glsl.h" #include "ffx_fsr2_common.h" -layout (set = 1, binding = FSR2_BIND_SRV_PRE_ALPHA_COLOR) uniform texture2D r_input_color_pre_alpha; -layout (set = 1, binding = FSR2_BIND_SRV_POST_ALPHA_COLOR) uniform texture2D r_input_color_post_alpha; -layout (set = 1, binding = FSR2_BIND_UAV_REACTIVE, r8) uniform image2D rw_output_reactive_mask; +// layout (set = 1, binding = FSR2_BIND_SRV_PRE_ALPHA_COLOR) uniform texture2D r_input_color_pre_alpha; +// layout (set = 1, binding = FSR2_BIND_SRV_POST_ALPHA_COLOR) uniform texture2D r_input_color_post_alpha; +// layout (set = 1, binding = FSR2_BIND_UAV_REACTIVE, r8) uniform image2D rw_output_reactive_mask; #ifndef FFX_FSR2_THREAD_GROUP_WIDTH @@ -51,6 +51,7 @@ layout (set = 1, binding = FSR2_BIND_UAV_REACTIVE, r8) uniform image2D r #define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in; #endif // #ifndef FFX_FSR2_NUM_THREADS +#if defined(FSR2_BIND_CB_REACTIVE) layout (set = 1, binding = FSR2_BIND_CB_REACTIVE, std140) uniform cbGenerateReactive_t { float scale; @@ -58,14 +59,15 @@ layout (set = 1, binding = FSR2_BIND_CB_REACTIVE, std140) uniform cbGenerateReac float binaryValue; uint flags; } cbGenerateReactive; +#endif FFX_FSR2_NUM_THREADS void main() { FfxUInt32x2 uDispatchThreadId = gl_GlobalInvocationID.xy; - FfxFloat32x3 ColorPreAlpha = texelFetch(r_input_color_pre_alpha, FfxInt32x2(uDispatchThreadId), 0).rgb; - FfxFloat32x3 ColorPostAlpha = texelFetch(r_input_color_post_alpha, FfxInt32x2(uDispatchThreadId), 0).rgb; + FfxFloat32x3 ColorPreAlpha = LoadOpaqueOnly(FFX_MIN16_I2(uDispatchThreadId)).rgb; + FfxFloat32x3 ColorPostAlpha = LoadInputColor(FFX_MIN16_I2(uDispatchThreadId)).rgb; if ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP) != 0) { @@ -87,5 +89,5 @@ void main() out_reactive_value = ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD)!=0) ? ((out_reactive_value < cbGenerateReactive.threshold) ? 0 : cbGenerateReactive.binaryValue) : out_reactive_value; - imageStore(rw_output_reactive_mask, FfxInt32x2(uDispatchThreadId), vec4(out_reactive_value)); + imageStore(rw_output_autoreactive, FfxInt32x2(uDispatchThreadId), vec4(out_reactive_value)); } diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_autogen_reactive_pass.hlsl b/Assets/Resources/FSR2/shaders/ffx_fsr2_autogen_reactive_pass.hlsl index 903ceae..a78a8e3 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_autogen_reactive_pass.hlsl +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_autogen_reactive_pass.hlsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -19,18 +19,16 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -#define FSR2_BIND_SRV_PRE_ALPHA_COLOR 0 -#define FSR2_BIND_SRV_POST_ALPHA_COLOR 1 -#define FSR2_BIND_UAV_REACTIVE 0 +#define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY 0 +#define FSR2_BIND_SRV_INPUT_COLOR 1 +#define FSR2_BIND_UAV_AUTOREACTIVE 0 + #define FSR2_BIND_CB_FSR2 0 +#define FSR2_BIND_CB_REACTIVE 1 #include "ffx_fsr2_callbacks_hlsl.h" #include "ffx_fsr2_common.h" -Texture2D r_input_color_pre_alpha : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PRE_ALPHA_COLOR); -Texture2D r_input_color_post_alpha : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_POST_ALPHA_COLOR); -RWTexture2D rw_output_reactive_mask : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_REACTIVE); - #ifndef FFX_FSR2_THREAD_GROUP_WIDTH #define FFX_FSR2_THREAD_GROUP_WIDTH 8 #endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH @@ -44,13 +42,15 @@ RWTexture2D rw_output_reactive_mask : FF #define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] #endif // #ifndef FFX_FSR2_NUM_THREADS -cbuffer cbGenerateReactive : register(b0) +#if defined(FSR2_BIND_CB_REACTIVE) +cbuffer cbGenerateReactive : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_REACTIVE) { float scale; float threshold; float binaryValue; uint flags; }; +#endif FFX_FSR2_NUM_THREADS FFX_FSR2_EMBED_ROOTSIG_CONTENT @@ -58,8 +58,8 @@ void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) { uint2 uDispatchThreadId = uGroupId * uint2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + uGroupThreadId; - float3 ColorPreAlpha = r_input_color_pre_alpha[uDispatchThreadId].rgb; - float3 ColorPostAlpha = r_input_color_post_alpha[uDispatchThreadId].rgb; + float3 ColorPreAlpha = LoadOpaqueOnly( FFX_MIN16_I2(uDispatchThreadId) ).rgb; + float3 ColorPostAlpha = LoadInputColor(uDispatchThreadId).rgb; if (flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP) { @@ -81,5 +81,5 @@ void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) out_reactive_value = (flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD) ? (out_reactive_value < threshold ? 0 : binaryValue) : out_reactive_value; - rw_output_reactive_mask[uDispatchThreadId] = out_reactive_value; + rw_output_autoreactive[uDispatchThreadId] = out_reactive_value; } diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_callbacks_glsl.h b/Assets/Resources/FSR2/shaders/ffx_fsr2_callbacks_glsl.h index d598250..10da13f 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_callbacks_glsl.h +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_callbacks_glsl.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -32,49 +32,67 @@ #if defined(FSR2_BIND_CB_FSR2) layout (set = 1, binding = FSR2_BIND_CB_FSR2, std140) uniform cbFSR2_t { - FfxInt32x2 iRenderSize; - FfxInt32x2 iDisplaySize; - FfxInt32x2 uLumaMipDimensions; - FfxInt32 uLumaMipLevelToUse; - FfxInt32 uFrameIndex; - FfxFloat32x2 fDisplaySizeRcp; - FfxFloat32x2 fJitter; - FfxFloat32x4 fDeviceToViewDepth; - FfxFloat32x2 depthclip_uv_scale; - FfxFloat32x2 postprocessed_lockstatus_uv_scale; - FfxFloat32x2 reactive_mask_dim_rcp; - FfxFloat32x2 MotionVectorScale; - FfxFloat32x2 fDownscaleFactor; - FfxFloat32 fPreExposure; - FfxFloat32 fTanHalfFOV; - FfxFloat32x2 fMotionVectorJitterCancellation; - FfxFloat32 fJitterSequenceLength; - FfxFloat32 fLockInitialLifetime; - FfxFloat32 fLockTickDelta; - FfxFloat32 fDeltaTime; - FfxFloat32 fDynamicResChangeFactor; - FfxFloat32 fLumaMipRcp; + FfxInt32x2 iRenderSize; + FfxInt32x2 iMaxRenderSize; + FfxInt32x2 iDisplaySize; + FfxInt32x2 iInputColorResourceDimensions; + FfxInt32x2 iLumaMipDimensions; + FfxInt32 iLumaMipLevelToUse; + FfxInt32 iFrameIndex; + + FfxFloat32x4 fDeviceToViewDepth; + FfxFloat32x2 fJitter; + FfxFloat32x2 fMotionVectorScale; + FfxFloat32x2 fDownscaleFactor; + FfxFloat32x2 fMotionVectorJitterCancellation; + FfxFloat32 fPreExposure; + FfxFloat32 fPreviousFramePreExposure; + FfxFloat32 fTanHalfFOV; + FfxFloat32 fJitterSequenceLength; + FfxFloat32 fDeltaTime; + FfxFloat32 fDynamicResChangeFactor; + FfxFloat32 fViewSpaceToMetersFactor; } cbFSR2; #endif -FfxFloat32 LumaMipRcp() +FfxInt32x2 RenderSize() +{ + return cbFSR2.iRenderSize; +} + +FfxInt32x2 MaxRenderSize() +{ + return cbFSR2.iMaxRenderSize; +} + +FfxInt32x2 DisplaySize() +{ + return cbFSR2.iDisplaySize; +} + +FfxInt32x2 InputColorResourceDimensions() { - return cbFSR2.fLumaMipRcp; + return cbFSR2.iInputColorResourceDimensions; } FfxInt32x2 LumaMipDimensions() { - return cbFSR2.uLumaMipDimensions; + return cbFSR2.iLumaMipDimensions; } FfxInt32 LumaMipLevelToUse() { - return cbFSR2.uLumaMipLevelToUse; + return cbFSR2.iLumaMipLevelToUse; } -FfxFloat32x2 DownscaleFactor() +FfxInt32 FrameIndex() { - return cbFSR2.fDownscaleFactor; + return cbFSR2.iFrameIndex; +} + +FfxFloat32x4 DeviceToViewSpaceTransformFactors() +{ + return cbFSR2.fDeviceToViewDepth; } FfxFloat32x2 Jitter() @@ -82,39 +100,39 @@ FfxFloat32x2 Jitter() return cbFSR2.fJitter; } -FfxFloat32x2 MotionVectorJitterCancellation() +FfxFloat32x2 MotionVectorScale() { - return cbFSR2.fMotionVectorJitterCancellation; + return cbFSR2.fMotionVectorScale; } -FfxInt32x2 RenderSize() +FfxFloat32x2 DownscaleFactor() { - return cbFSR2.iRenderSize; + return cbFSR2.fDownscaleFactor; } -FfxInt32x2 DisplaySize() +FfxFloat32x2 MotionVectorJitterCancellation() { - return cbFSR2.iDisplaySize; + return cbFSR2.fMotionVectorJitterCancellation; } -FfxFloat32x2 DisplaySizeRcp() +FfxFloat32 PreExposure() { - return cbFSR2.fDisplaySizeRcp; + return cbFSR2.fPreExposure; } -FfxFloat32 JitterSequenceLength() +FfxFloat32 PreviousFramePreExposure() { - return cbFSR2.fJitterSequenceLength; + return cbFSR2.fPreviousFramePreExposure; } -FfxFloat32 LockInitialLifetime() +FfxFloat32 TanHalfFoV() { - return cbFSR2.fLockInitialLifetime; + return cbFSR2.fTanHalfFOV; } -FfxFloat32 LockTickDelta() +FfxFloat32 JitterSequenceLength() { - return cbFSR2.fLockTickDelta; + return cbFSR2.fJitterSequenceLength; } FfxFloat32 DeltaTime() @@ -122,38 +140,37 @@ FfxFloat32 DeltaTime() return cbFSR2.fDeltaTime; } -FfxFloat32 MaxAccumulationWeight() -{ - const FfxFloat32 averageLanczosWeightPerFrame = 0.74f; // Average lanczos weight for jitter accumulated samples - - return 12; //32.0f * averageLanczosWeightPerFrame; -} - FfxFloat32 DynamicResChangeFactor() { return cbFSR2.fDynamicResChangeFactor; } -FfxInt32 FrameIndex() +FfxFloat32 ViewSpaceToMetersFactor() { - return cbFSR2.uFrameIndex; + return cbFSR2.fViewSpaceToMetersFactor; } layout (set = 0, binding = 0) uniform sampler s_PointClamp; layout (set = 0, binding = 1) uniform sampler s_LinearClamp; // SRVs +#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) + layout (set = 1, binding = FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) uniform texture2D r_input_opaque_only; +#endif #if defined(FSR2_BIND_SRV_INPUT_COLOR) layout (set = 1, binding = FSR2_BIND_SRV_INPUT_COLOR) uniform texture2D r_input_color_jittered; #endif -#if defined(FSR2_BIND_SRV_MOTION_VECTORS) - layout (set = 1, binding = FSR2_BIND_SRV_MOTION_VECTORS) uniform texture2D r_motion_vectors; +#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS) + layout (set = 1, binding = FSR2_BIND_SRV_INPUT_MOTION_VECTORS) uniform texture2D r_input_motion_vectors; #endif -#if defined(FSR2_BIND_SRV_DEPTH) - layout (set = 1, binding = FSR2_BIND_SRV_DEPTH) uniform texture2D r_depth; +#if defined(FSR2_BIND_SRV_INPUT_DEPTH) + layout (set = 1, binding = FSR2_BIND_SRV_INPUT_DEPTH) uniform texture2D r_input_depth; +#endif +#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE) + layout (set = 1, binding = FSR2_BIND_SRV_INPUT_EXPOSURE) uniform texture2D r_input_exposure; #endif -#if defined(FSR2_BIND_SRV_EXPOSURE) - layout (set = 1, binding = FSR2_BIND_SRV_EXPOSURE) uniform texture2D r_exposure; +#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE) + layout(set = 1, binding = FSR2_BIND_SRV_AUTO_EXPOSURE) uniform texture2D r_auto_exposure; #endif #if defined(FSR2_BIND_SRV_REACTIVE_MASK) layout (set = 1, binding = FSR2_BIND_SRV_REACTIVE_MASK) uniform texture2D r_reactive_mask; @@ -167,6 +184,9 @@ layout (set = 0, binding = 1) uniform sampler s_LinearClamp; #if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) layout (set = 1, binding = FSR2_BIND_SRV_DILATED_MOTION_VECTORS) uniform texture2D r_dilated_motion_vectors; #endif +#if defined (FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) + layout(set = 1, binding = FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) uniform texture2D r_previous_dilated_motion_vectors; +#endif #if defined(FSR2_BIND_SRV_DILATED_DEPTH) layout (set = 1, binding = FSR2_BIND_SRV_DILATED_DEPTH) uniform texture2D r_dilatedDepth; #endif @@ -176,8 +196,11 @@ layout (set = 0, binding = 1) uniform sampler s_LinearClamp; #if defined(FSR2_BIND_SRV_LOCK_STATUS) layout (set = 1, binding = FSR2_BIND_SRV_LOCK_STATUS) uniform texture2D r_lock_status; #endif -#if defined(FSR2_BIND_SRV_DEPTH_CLIP) - layout (set = 1, binding = FSR2_BIND_SRV_DEPTH_CLIP) uniform texture2D r_depth_clip; +#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA) + layout (set = 1, binding = FSR2_BIND_SRV_LOCK_INPUT_LUMA) uniform texture2D r_lock_input_luma; +#endif +#if defined(FSR2_BIND_SRV_NEW_LOCKS) + layout(set = 1, binding = FSR2_BIND_SRV_NEW_LOCKS) uniform texture2D r_new_locks; #endif #if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) layout (set = 1, binding = FSR2_BIND_SRV_PREPARED_INPUT_COLOR) uniform texture2D r_prepared_input_color; @@ -191,8 +214,8 @@ layout (set = 0, binding = 1) uniform sampler s_LinearClamp; #if defined(FSR2_BIND_SRV_LANCZOS_LUT) layout (set = 1, binding = FSR2_BIND_SRV_LANCZOS_LUT) uniform texture2D r_lanczos_lut; #endif -#if defined(FSR2_BIND_SRV_EXPOSURE_MIPS) - layout (set = 1, binding = FSR2_BIND_SRV_EXPOSURE_MIPS) uniform texture2D r_imgMips; +#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) + layout (set = 1, binding = FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) uniform texture2D r_imgMips; #endif #if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) layout (set = 1, binding = FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) uniform texture2D r_upsample_maximum_bias_lut; @@ -200,444 +223,348 @@ layout (set = 0, binding = 1) uniform sampler s_LinearClamp; #if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) layout (set = 1, binding = FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) uniform texture2D r_dilated_reactive_masks; #endif +#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) + layout(set = 1, binding = FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) uniform texture2D r_input_prev_color_pre_alpha; +#endif +#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) + layout(set = 1, binding = FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) uniform texture2D r_input_prev_color_post_alpha; +#endif // UAV #if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH layout (set = 1, binding = FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, r32ui) uniform uimage2D rw_reconstructed_previous_nearest_depth; #endif #if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS - layout (set = 1, binding = FSR2_BIND_UAV_DILATED_MOTION_VECTORS, rg32f) uniform image2D rw_dilated_motion_vectors; + layout (set = 1, binding = FSR2_BIND_UAV_DILATED_MOTION_VECTORS, rg16f) writeonly uniform image2D rw_dilated_motion_vectors; #endif #if defined FSR2_BIND_UAV_DILATED_DEPTH - layout (set = 1, binding = FSR2_BIND_UAV_DILATED_DEPTH, r32f) uniform image2D rw_dilatedDepth; + layout (set = 1, binding = FSR2_BIND_UAV_DILATED_DEPTH, r16f) writeonly uniform image2D rw_dilatedDepth; #endif #if defined FSR2_BIND_UAV_INTERNAL_UPSCALED - layout (set = 1, binding = FSR2_BIND_UAV_INTERNAL_UPSCALED, rgba32f) uniform image2D rw_internal_upscaled_color; + layout (set = 1, binding = FSR2_BIND_UAV_INTERNAL_UPSCALED, rgba16f) writeonly uniform image2D rw_internal_upscaled_color; #endif #if defined FSR2_BIND_UAV_LOCK_STATUS - layout (set = 1, binding = FSR2_BIND_UAV_LOCK_STATUS, r11f_g11f_b10f) uniform image2D rw_lock_status; + layout (set = 1, binding = FSR2_BIND_UAV_LOCK_STATUS, rg16f) uniform image2D rw_lock_status; +#endif +#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA) + layout(set = 1, binding = FSR2_BIND_UAV_LOCK_INPUT_LUMA, r16f) writeonly uniform image2D rw_lock_input_luma; #endif -#if defined FSR2_BIND_UAV_DEPTH_CLIP - layout (set = 1, binding = FSR2_BIND_UAV_DEPTH_CLIP, r32f) uniform image2D rw_depth_clip; +#if defined FSR2_BIND_UAV_NEW_LOCKS + layout(set = 1, binding = FSR2_BIND_UAV_NEW_LOCKS, r8) uniform image2D rw_new_locks; #endif #if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR - layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba16) uniform image2D rw_prepared_input_color; + layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba16) writeonly uniform image2D rw_prepared_input_color; #endif #if defined FSR2_BIND_UAV_LUMA_HISTORY - layout (set = 1, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba32f) uniform image2D rw_luma_history; + layout (set = 1, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba8) uniform image2D rw_luma_history; #endif #if defined FSR2_BIND_UAV_UPSCALED_OUTPUT - layout (set = 1, binding = FSR2_BIND_UAV_UPSCALED_OUTPUT, rgba32f) uniform image2D rw_upscaled_output; + layout (set = 1, binding = FSR2_BIND_UAV_UPSCALED_OUTPUT /* app controlled format */) writeonly uniform image2D rw_upscaled_output; #endif #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE - layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, r32f) coherent uniform image2D rw_img_mip_shading_change; + layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, r16f) coherent uniform image2D rw_img_mip_shading_change; #endif #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 - layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_5, r32f) coherent uniform image2D rw_img_mip_5; + layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_5, r16f) coherent uniform image2D rw_img_mip_5; #endif #if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS - layout (set = 1, binding = FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, rg32f) uniform image2D rw_dilated_reactive_masks; + layout (set = 1, binding = FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, rg8) writeonly uniform image2D rw_dilated_reactive_masks; #endif #if defined FSR2_BIND_UAV_EXPOSURE layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE, rg32f) uniform image2D rw_exposure; -#endif +#endif +#if defined FSR2_BIND_UAV_AUTO_EXPOSURE + layout(set = 1, binding = FSR2_BIND_UAV_AUTO_EXPOSURE, rg32f) uniform image2D rw_auto_exposure; +#endif #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC layout (set = 1, binding = FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC, r32ui) coherent uniform uimage2D rw_spd_global_atomic; #endif +#if defined FSR2_BIND_UAV_AUTOREACTIVE + layout(set = 1, binding = FSR2_BIND_UAV_AUTOREACTIVE, r32f) uniform image2D rw_output_autoreactive; +#endif +#if defined FSR2_BIND_UAV_AUTOCOMPOSITION + layout(set = 1, binding = FSR2_BIND_UAV_AUTOCOMPOSITION, r32f) uniform image2D rw_output_autocomposition; +#endif +#if defined FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR + layout(set = 1, binding = FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_pre_alpha; +#endif +#if defined FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR + layout(set = 1, binding = FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_post_alpha; +#endif + +#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) FfxFloat32 LoadMipLuma(FfxInt32x2 iPxPos, FfxInt32 mipLevel) { -#if defined(FSR2_BIND_SRV_EXPOSURE_MIPS) return texelFetch(r_imgMips, iPxPos, FfxInt32(mipLevel)).r; -#else - return 0.f; -#endif } +#endif - +#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxInt32 mipLevel) { -#if defined(FSR2_BIND_SRV_EXPOSURE_MIPS) - fUV *= cbFSR2.depthclip_uv_scale; return textureLod(sampler2D(r_imgMips, s_LinearClamp), fUV, FfxFloat32(mipLevel)).r; -#else - return 0.f; -#endif -} - -// -// a 0 0 0 x -// 0 b 0 0 y -// 0 0 c d z -// 0 0 e 0 1 -// -// z' = (z*c+d)/(z*e) -// z' = (c/e) + d/(z*e) -// z' - (c/e) = d/(z*e) -// (z'e - c)/e = d/(z*e) -// e / (z'e - c) = (z*e)/d -// (e * d) / (z'e - c) = z*e -// z = d / (z'e - c) -FfxFloat32 ConvertFromDeviceDepthToViewSpace(FfxFloat32 fDeviceDepth) -{ - return -cbFSR2.fDeviceToViewDepth[2] / (fDeviceDepth * cbFSR2.fDeviceToViewDepth[1] - cbFSR2.fDeviceToViewDepth[0]); } +#endif +#if defined(FSR2_BIND_SRV_INPUT_DEPTH) FfxFloat32 LoadInputDepth(FfxInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_DEPTH) - return texelFetch(r_depth, iPxPos, 0).r; -#else - return 0.f; -#endif + return texelFetch(r_input_depth, iPxPos, 0).r; } +#endif +#if defined(FSR2_BIND_SRV_REACTIVE_MASK) FfxFloat32 LoadReactiveMask(FfxInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_REACTIVE_MASK) return texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r; -#else - return 0.f; -#endif -} - -FfxFloat32x4 GatherReactiveMask(FfxInt32x2 iPxPos) -{ -#if defined(FSR2_BIND_SRV_REACTIVE_MASK) - return textureGather(sampler2D(r_reactive_mask, s_LinearClamp), FfxFloat32x2(iPxPos) * cbFSR2.reactive_mask_dim_rcp, 0); -#else - return FfxFloat32x4(0.f); -#endif } - -FfxFloat32 LoadTransparencyAndCompositionMask(FfxInt32x2 iPxPos) -{ -#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) - return texelFetch(r_transparency_and_composition_mask, iPxPos, 0).r; -#else - return 0.f; #endif -} -FfxFloat32 SampleTransparencyAndCompositionMask(FfxFloat32x2 fUV) -{ #if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) - fUV *= cbFSR2.depthclip_uv_scale; - return textureLod(sampler2D(r_transparency_and_composition_mask, s_LinearClamp), fUV, 0.0f).x; -#else - return 0.f; -#endif -} - -FfxFloat32 PreExposure() +FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos) { - return cbFSR2.fPreExposure; + return texelFetch(r_transparency_and_composition_mask, FfxInt32x2(iPxPos), 0).r; } +#endif +#if defined(FSR2_BIND_SRV_INPUT_COLOR) FfxFloat32x3 LoadInputColor(FfxInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_INPUT_COLOR) - return texelFetch(r_input_color_jittered, iPxPos, 0).rgb / PreExposure(); -#else - return FfxFloat32x3(0.f); -#endif + return texelFetch(r_input_color_jittered, iPxPos, 0).rgb; } +#endif -FfxFloat32x3 LoadInputColorWithoutPreExposure(FfxInt32x2 iPxPos) -{ #if defined(FSR2_BIND_SRV_INPUT_COLOR) - return texelFetch(r_input_color_jittered, iPxPos, 0).rgb; -#else - return FfxFloat32x3(0.f); -#endif +FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_input_color_jittered, s_LinearClamp), fUV, 0.0f).rgb; } +#endif +#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) FfxFloat32x3 LoadPreparedInputColor(FfxInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) - return texelFetch(r_prepared_input_color, iPxPos, 0).rgb; -#else - return FfxFloat32x3(0.f); -#endif + return texelFetch(r_prepared_input_color, iPxPos, 0).xyz; } - -FfxFloat32 LoadPreparedInputColorLuma(FfxInt32x2 iPxPos) -{ -#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) - return texelFetch(r_prepared_input_color, iPxPos, 0).a; -#else - return 0.f; #endif -} +#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS) FfxFloat32x2 LoadInputMotionVector(FfxInt32x2 iPxDilatedMotionVectorPos) { -#if defined(FSR2_BIND_SRV_MOTION_VECTORS) - FfxFloat32x2 fSrcMotionVector = texelFetch(r_motion_vectors, iPxDilatedMotionVectorPos, 0).xy; -#else - FfxFloat32x2 fSrcMotionVector = FfxFloat32x2(0.f); -#endif + FfxFloat32x2 fSrcMotionVector = texelFetch(r_input_motion_vectors, iPxDilatedMotionVectorPos, 0).xy; - FfxFloat32x2 fUvMotionVector = fSrcMotionVector * cbFSR2.MotionVectorScale; + FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale(); #if FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS - fUvMotionVector -= cbFSR2.fMotionVectorJitterCancellation; + fUvMotionVector -= MotionVectorJitterCancellation(); #endif return fUvMotionVector; } +#endif +#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED) FfxFloat32x4 LoadHistory(FfxInt32x2 iPxHistory) { -#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED) return texelFetch(r_internal_upscaled_color, iPxHistory, 0); -#else - return FfxFloat32x4(0.0f); -#endif } - -FfxFloat32x4 LoadRwInternalUpscaledColorAndWeight(FfxInt32x2 iPxPos) -{ -#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) - return imageLoad(rw_internal_upscaled_color, iPxPos); -#else - return FfxFloat32x4(0.f); #endif -} +#if defined(FSR2_BIND_UAV_LUMA_HISTORY) void StoreLumaHistory(FfxInt32x2 iPxPos, FfxFloat32x4 fLumaHistory) { -#if defined(FSR2_BIND_UAV_LUMA_HISTORY) imageStore(rw_luma_history, FfxInt32x2(iPxPos), fLumaHistory); -#endif } - -FfxFloat32x4 LoadRwLumaHistory(FfxInt32x2 iPxPos) -{ -#if defined(FSR2_BIND_UAV_LUMA_HISTORY) - return imageLoad(rw_luma_history, FfxInt32x2(iPxPos)); -#else - return FfxFloat32x4(1.f); #endif -} -FfxFloat32 LoadLumaStabilityFactor(FfxInt32x2 iPxPos) -{ #if defined(FSR2_BIND_SRV_LUMA_HISTORY) - return texelFetch(r_luma_history, FfxInt32x2(iPxPos), 0).w; -#else - return 0.f; -#endif -} - -FfxFloat32 SampleLumaStabilityFactor(FfxFloat32x2 fUV) +FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV) { -#if defined(FSR2_BIND_SRV_LUMA_HISTORY) - fUV *= cbFSR2.depthclip_uv_scale; - return textureLod(sampler2D(r_luma_history, s_LinearClamp), fUV, 0.0f).w; -#else - return 0.f; -#endif + return textureLod(sampler2D(r_luma_history, s_LinearClamp), fUV, 0.0f); } +#endif +#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) void StoreReprojectedHistory(FfxInt32x2 iPxHistory, FfxFloat32x4 fHistory) { -#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) imageStore(rw_internal_upscaled_color, iPxHistory, fHistory); -#endif } +#endif +#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) void StoreInternalColorAndWeight(FfxInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight) { -#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) imageStore(rw_internal_upscaled_color, FfxInt32x2(iPxPos), fColorAndWeight); -#endif } +#endif +#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT) void StoreUpscaledOutput(FfxInt32x2 iPxPos, FfxFloat32x3 fColor) { -#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT) - imageStore(rw_upscaled_output, FfxInt32x2(iPxPos), FfxFloat32x4(fColor * PreExposure(), 1.f)); -#endif + imageStore(rw_upscaled_output, FfxInt32x2(iPxPos), FfxFloat32x4(fColor, 1.f)); } +#endif -FfxFloat32x3 LoadLockStatus(FfxInt32x2 iPxPos) -{ #if defined(FSR2_BIND_SRV_LOCK_STATUS) - FfxFloat32x3 fLockStatus = texelFetch(r_lock_status, iPxPos, 0).rgb; - - fLockStatus[0] -= LockInitialLifetime() * 2.0f; +FfxFloat32x2 LoadLockStatus(FfxInt32x2 iPxPos) +{ + FfxFloat32x2 fLockStatus = texelFetch(r_lock_status, iPxPos, 0).rg; return fLockStatus; -#else - return FfxFloat32x3(0.f); -#endif } +#endif -FfxFloat32x3 LoadRwLockStatus(FfxInt32x2 iPxPos) -{ #if defined(FSR2_BIND_UAV_LOCK_STATUS) - FfxFloat32x3 fLockStatus = imageLoad(rw_lock_status, iPxPos).rgb; - - fLockStatus[0] -= LockInitialLifetime() * 2.0f; - - return fLockStatus; -#else - return FfxFloat32x3(0.f); -#endif +void StoreLockStatus(FfxInt32x2 iPxPos, FfxFloat32x2 fLockstatus) +{ + imageStore(rw_lock_status, iPxPos, vec4(fLockstatus, 0.0f, 0.0f)); } +#endif -void StoreLockStatus(FfxInt32x2 iPxPos, FfxFloat32x3 fLockstatus) +#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA) +FfxFloat32 LoadLockInputLuma(FfxInt32x2 iPxPos) { -#if defined(FSR2_BIND_UAV_LOCK_STATUS) - fLockstatus[0] += LockInitialLifetime() * 2.0f; - - imageStore(rw_lock_status, iPxPos, vec4(fLockstatus, 0.0f)); -#endif + return texelFetch(r_lock_input_luma, iPxPos, 0).r; } +#endif -void StorePreparedInputColor(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped) +#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA) +void StoreLockInputLuma(FfxInt32x2 iPxPos, FfxFloat32 fLuma) { -#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR) - imageStore(rw_prepared_input_color, iPxPos, fTonemapped); -#endif + imageStore(rw_lock_input_luma, iPxPos, vec4(fLuma, 0, 0, 0)); } +#endif -FfxBoolean IsResponsivePixel(FfxInt32x2 iPxPos) +#if defined(FSR2_BIND_SRV_NEW_LOCKS) +FfxFloat32 LoadNewLocks(FfxInt32x2 iPxPos) { - return FFX_FALSE; //not supported in prototype + return texelFetch(r_new_locks, iPxPos, 0).r; } +#endif -FfxFloat32 LoadDepthClip(FfxInt32x2 iPxPos) +#if defined(FSR2_BIND_UAV_NEW_LOCKS) +FfxFloat32 LoadRwNewLocks(FfxInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_DEPTH_CLIP) - return texelFetch(r_depth_clip, iPxPos, 0).r; -#else - return 0.f; -#endif + return imageLoad(rw_new_locks, iPxPos).r; } +#endif -FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV) +#if defined(FSR2_BIND_UAV_NEW_LOCKS) +void StoreNewLocks(FfxInt32x2 iPxPos, FfxFloat32 newLock) { -#if defined(FSR2_BIND_SRV_DEPTH_CLIP) - fUV *= cbFSR2.depthclip_uv_scale; - return textureLod(sampler2D(r_depth_clip, s_LinearClamp), fUV, 0.0f).r; -#else - return 0.f; -#endif + imageStore(rw_new_locks, iPxPos, vec4(newLock, 0, 0, 0)); } +#endif -FfxFloat32x3 SampleLockStatus(FfxFloat32x2 fUV) +#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR) +void StorePreparedInputColor(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped) { -#if defined(FSR2_BIND_SRV_LOCK_STATUS) - fUV *= cbFSR2.postprocessed_lockstatus_uv_scale; - FfxFloat32x3 fLockStatus = textureLod(sampler2D(r_lock_status, s_LinearClamp), fUV, 0.0f).rgb; - fLockStatus[0] -= LockInitialLifetime() * 2.0f; - return fLockStatus; -#else - return FfxFloat32x3(0.f); -#endif + imageStore(rw_prepared_input_color, iPxPos, fTonemapped); } +#endif -void StoreDepthClip(FfxInt32x2 iPxPos, FfxFloat32 fClip) +#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) +FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV) { -#if defined(FSR2_BIND_UAV_DEPTH_CLIP) - imageStore(rw_depth_clip, iPxPos, vec4(fClip, 0.0f, 0.0f, 0.0f)); -#endif + return textureLod(sampler2D(r_prepared_input_color, s_LinearClamp), fUV, 0.0f).w; } +#endif -FfxFloat32 TanHalfFoV() +#if defined(FSR2_BIND_SRV_LOCK_STATUS) +FfxFloat32x2 SampleLockStatus(FfxFloat32x2 fUV) { - return cbFSR2.fTanHalfFOV; + FfxFloat32x2 fLockStatus = textureLod(sampler2D(r_lock_status, s_LinearClamp), fUV, 0.0f).rg; + return fLockStatus; } +#endif +#if defined(FSR2_BIND_SRV_DEPTH) FfxFloat32 LoadSceneDepth(FfxInt32x2 iPxInput) { -#if defined(FSR2_BIND_SRV_DEPTH) - return texelFetch(r_depth, iPxInput, 0).r; -#else - return 0.f; -#endif + return texelFetch(r_input_depth, iPxInput, 0).r; } +#endif +#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) FfxFloat32 LoadReconstructedPrevDepth(FfxInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) return uintBitsToFloat(texelFetch(r_reconstructed_previous_nearest_depth, iPxPos, 0).r); -#else - return 0.f; -#endif } +#endif +#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) void StoreReconstructedDepth(FfxInt32x2 iPxSample, FfxFloat32 fDepth) { FfxUInt32 uDepth = floatBitsToUint(fDepth); -#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) + #if FFX_FSR2_OPTION_INVERTED_DEPTH imageAtomicMax(rw_reconstructed_previous_nearest_depth, iPxSample, uDepth); #else imageAtomicMin(rw_reconstructed_previous_nearest_depth, iPxSample, uDepth); // min for standard, max for inverted depth #endif -#endif } +#endif +#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) void SetReconstructedDepth(FfxInt32x2 iPxSample, FfxUInt32 uValue) { -#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) imageStore(rw_reconstructed_previous_nearest_depth, iPxSample, uvec4(uValue, 0, 0, 0)); -#endif } +#endif +#if defined(FSR2_BIND_UAV_DILATED_DEPTH) void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth) { -#if defined(FSR2_BIND_UAV_DILATED_DEPTH) //FfxUInt32 uDepth = f32tof16(fDepth); imageStore(rw_dilatedDepth, iPxPos, vec4(fDepth, 0.0f, 0.0f, 0.0f)); -#endif } +#endif +#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector) { -#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) imageStore(rw_dilated_motion_vectors, iPxPos, vec4(fMotionVector, 0.0f, 0.0f)); -#endif } +#endif +#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) FfxFloat32x2 LoadDilatedMotionVector(FfxInt32x2 iPxInput) { -#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) return texelFetch(r_dilated_motion_vectors, iPxInput, 0).rg; -#else - return FfxFloat32x2(0.f); -#endif } +#endif +#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV) { -#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) - fUV *= cbFSR2.depthclip_uv_scale; // TODO: assuming these are (RenderSize() / MaxRenderSize()) return textureLod(sampler2D(r_dilated_motion_vectors, s_LinearClamp), fUV, 0.0f).rg; -#else - return FfxFloat32x2(0.f); +} #endif + +#if defined(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) +FfxFloat32x2 LoadPreviousDilatedMotionVector(FfxInt32x2 iPxInput) +{ + return texelFetch(r_previous_dilated_motion_vectors, iPxInput, 0).rg; } -FfxFloat32 LoadDilatedDepth(FfxInt32x2 iPxInput) +FfxFloat32x2 SamplePreviousDilatedMotionVector(FfxFloat32x2 fUV) { + return textureLod(sampler2D(r_previous_dilated_motion_vectors, s_LinearClamp), fUV, 0.0f).xy; +} +#endif + #if defined(FSR2_BIND_SRV_DILATED_DEPTH) +FfxFloat32 LoadDilatedDepth(FfxInt32x2 iPxInput) +{ return texelFetch(r_dilatedDepth, iPxInput, 0).r; -#else - return 0.f; -#endif } +#endif +#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE) FfxFloat32 Exposure() { - #if defined(FSR2_BIND_SRV_EXPOSURE) - FfxFloat32 exposure = texelFetch(r_exposure, FfxInt32x2(0,0), 0).x; - #else - FfxFloat32 exposure = 1.f; - #endif + FfxFloat32 exposure = texelFetch(r_input_exposure, FfxInt32x2(0, 0), 0).x; if (exposure == 0.0f) { exposure = 1.0f; @@ -645,6 +572,20 @@ FfxFloat32 Exposure() return exposure; } +#endif + +#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE) +FfxFloat32 AutoExposure() +{ + FfxFloat32 exposure = texelFetch(r_auto_exposure, FfxInt32x2(0, 0), 0).x; + + if (exposure == 0.0f) { + exposure = 1.0f; + } + + return exposure; +} +#endif FfxFloat32 SampleLanczos2Weight(FfxFloat32 x) { @@ -655,41 +596,86 @@ FfxFloat32 SampleLanczos2Weight(FfxFloat32 x) #endif } +#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv) { -#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) // Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range. return FfxFloat32(2.0f) * FfxFloat32(textureLod(sampler2D(r_upsample_maximum_bias_lut, s_LinearClamp), abs(uv) * 2.0f, 0.0f).r); -#else - return FfxFloat32(0.f); -#endif } +#endif +#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV) { -#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) - fUV *= cbFSR2.depthclip_uv_scale; // TODO: assuming these are (RenderSize() / MaxRenderSize()) return textureLod(sampler2D(r_dilated_reactive_masks, s_LinearClamp), fUV, 0.0f).rg; -#else - return FfxFloat32x2(0.f); -#endif } +#endif +#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) FfxFloat32x2 LoadDilatedReactiveMasks(FFX_PARAMETER_IN FfxInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) return texelFetch(r_dilated_reactive_masks, iPxPos, 0).rg; -#else - return FfxFloat32x2(0.f); -#endif } +#endif +#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS) void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fDilatedReactiveMasks) { -#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS) imageStore(rw_dilated_reactive_masks, iPxPos, vec4(fDilatedReactiveMasks, 0.0f, 0.0f)); +} +#endif + +#if defined(FFX_INTERNAL) +FfxFloat32x4 SampleDebug(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_debug_out, s_LinearClamp), fUV, 0.0f).rgba; +} #endif + +#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) +FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return texelFetch(r_input_opaque_only, iPxPos, 0).xyz; } +#endif +#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) +FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return texelFetch(r_input_prev_color_pre_alpha, iPxPos, 0).xyz; +} +#endif + +#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) +FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return texelFetch(r_input_prev_color_post_alpha, iPxPos, 0).xyz; +} +#endif + +#if defined(FSR2_BIND_UAV_AUTOREACTIVE) +#if defined(FSR2_BIND_UAV_AUTOCOMPOSITION) +void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive) +{ + imageStore(rw_output_autoreactive, iPxPos, vec4(FfxFloat32(fReactive.x), 0.0f, 0.0f, 0.0f)); + + imageStore(rw_output_autocomposition, iPxPos, vec4(FfxFloat32(fReactive.y), 0.0f, 0.0f, 0.0f)); +} +#endif +#endif + +#if defined(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR) +void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) +{ + imageStore(rw_output_prev_color_pre_alpha, iPxPos, vec4(color, 0.0f)); +} +#endif + +#if defined(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR) +void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) +{ + imageStore(rw_output_prev_color_post_alpha, iPxPos, vec4(color, 0.0f)); +} +#endif #endif // #if defined(FFX_GPU) diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_callbacks_hlsl.h b/Assets/Resources/FSR2/shaders/ffx_fsr2_callbacks_hlsl.h index 4641927..fd722b3 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_callbacks_hlsl.h +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_callbacks_hlsl.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -48,56 +48,32 @@ #define FFX_FSR2_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex)) #define FFX_FSR2_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex)) -#if defined(FSR2_BIND_CB_FSR2) +#if defined(FSR2_BIND_CB_FSR2) || defined(FFX_INTERNAL) cbuffer cbFSR2 : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_FSR2) { - FfxInt32x2 uRenderSize; - FfxInt32x2 uDisplaySize; - FfxInt32x2 uLumaMipDimensions; - FfxInt32 uLumaMipLevelToUse; - FfxUInt32 uFrameIndex; - FfxFloat32x2 fDisplaySizeRcp; - FfxFloat32x2 fJitter; + FfxInt32x2 iRenderSize; + FfxInt32x2 iMaxRenderSize; + FfxInt32x2 iDisplaySize; + FfxInt32x2 iInputColorResourceDimensions; + FfxInt32x2 iLumaMipDimensions; + FfxInt32 iLumaMipLevelToUse; + FfxInt32 iFrameIndex; + FfxFloat32x4 fDeviceToViewDepth; - FfxFloat32x2 depthclip_uv_scale; - FfxFloat32x2 postprocessed_lockstatus_uv_scale; - FfxFloat32x2 reactive_mask_dim_rcp; - FfxFloat32x2 MotionVectorScale; + FfxFloat32x2 fJitter; + FfxFloat32x2 fMotionVectorScale; FfxFloat32x2 fDownscaleFactor; + FfxFloat32x2 fMotionVectorJitterCancellation; FfxFloat32 fPreExposure; + FfxFloat32 fPreviousFramePreExposure; FfxFloat32 fTanHalfFOV; - FfxFloat32x2 fMotionVectorJitterCancellation; FfxFloat32 fJitterSequenceLength; - FfxFloat32 fLockInitialLifetime; - FfxFloat32 fLockTickDelta; FfxFloat32 fDeltaTime; FfxFloat32 fDynamicResChangeFactor; - FfxFloat32 fLumaMipRcp; -#define FFX_FSR2_CONSTANT_BUFFER_1_SIZE 36 // Number of 32-bit values. This must be kept in sync with the cbFSR2 size. + FfxFloat32 fViewSpaceToMetersFactor; }; -#else - #define iRenderSize 0 - #define iDisplaySize 0 - #define iLumaMipDimensions 0 - #define iLumaMipLevelToUse 0 - #define iFrameIndex 0 - #define fDisplaySizeRcp 0 - #define fJitter 0 - #define fDeviceToViewDepth FfxFloat32x4(0,0,0,0) - #define depthclip_uv_scale 0 - #define postprocessed_lockstatus_uv_scale 0 - #define reactive_mask_dim_rcp 0 - #define MotionVectorScale 0 - #define fDownscaleFactor 0 - #define fPreExposure 0 - #define fTanHalfFOV 0 - #define fMotionVectorJitterCancellation 0 - #define fJitterSequenceLength 0 - #define fLockInitialLifetime 0 - #define fLockTickDelta 0 - #define fDeltaTime 0 - #define fDynamicResChangeFactor 0 - #define fLumaMipRcp 0 + +#define FFX_FSR2_CONSTANT_BUFFER_1_SIZE (sizeof(cbFSR2) / 4) // Number of 32-bit values. This must be kept in sync with the cbFSR2 size. #endif #if defined(FFX_GPU) @@ -146,25 +122,40 @@ #endif // #if FFX_FSR2_EMBED_ROOTSIG #endif // #if defined(FFX_GPU) +/* Define getter functions in the order they are defined in the CB! */ +FfxInt32x2 RenderSize() +{ + return iRenderSize; +} -FfxFloat32 LumaMipRcp() +FfxInt32x2 MaxRenderSize() { - return fLumaMipRcp; + return iMaxRenderSize; +} + +FfxInt32x2 DisplaySize() +{ + return iDisplaySize; +} + +FfxInt32x2 InputColorResourceDimensions() +{ + return iInputColorResourceDimensions; } FfxInt32x2 LumaMipDimensions() { - return uLumaMipDimensions; + return iLumaMipDimensions; } FfxInt32 LumaMipLevelToUse() { - return uLumaMipLevelToUse; + return iLumaMipLevelToUse; } -FfxFloat32x2 DownscaleFactor() +FfxInt32 FrameIndex() { - return fDownscaleFactor; + return iFrameIndex; } FfxFloat32x2 Jitter() @@ -172,51 +163,49 @@ FfxFloat32x2 Jitter() return fJitter; } -FfxFloat32x2 MotionVectorJitterCancellation() +FfxFloat32x4 DeviceToViewSpaceTransformFactors() { - return fMotionVectorJitterCancellation; + return fDeviceToViewDepth; } -FfxInt32x2 RenderSize() +FfxFloat32x2 MotionVectorScale() { - return uRenderSize; + return fMotionVectorScale; } -FfxInt32x2 DisplaySize() +FfxFloat32x2 DownscaleFactor() { - return uDisplaySize; + return fDownscaleFactor; } -FfxFloat32x2 DisplaySizeRcp() +FfxFloat32x2 MotionVectorJitterCancellation() { - return fDisplaySizeRcp; + return fMotionVectorJitterCancellation; } -FfxFloat32 JitterSequenceLength() +FfxFloat32 PreExposure() { - return fJitterSequenceLength; + return fPreExposure; } -FfxFloat32 LockInitialLifetime() +FfxFloat32 PreviousFramePreExposure() { - return fLockInitialLifetime; + return fPreviousFramePreExposure; } -FfxFloat32 LockTickDelta() +FfxFloat32 TanHalfFoV() { - return fLockTickDelta; + return fTanHalfFOV; } -FfxFloat32 DeltaTime() +FfxFloat32 JitterSequenceLength() { - return fDeltaTime; + return fJitterSequenceLength; } -FfxFloat32 MaxAccumulationWeight() +FfxFloat32 DeltaTime() { - const FfxFloat32 averageLanczosWeightPerFrame = 0.74f; // Average lanczos weight for jitter accumulated samples - - return 12; //32.0f * averageLanczosWeightPerFrame; + return fDeltaTime; } FfxFloat32 DynamicResChangeFactor() @@ -224,574 +213,495 @@ FfxFloat32 DynamicResChangeFactor() return fDynamicResChangeFactor; } -FfxUInt32 FrameIndex() +FfxFloat32 ViewSpaceToMetersFactor() { - return uFrameIndex; + return fViewSpaceToMetersFactor; } + SamplerState s_PointClamp : register(s0); SamplerState s_LinearClamp : register(s1); // SRVs #if defined(FFX_INTERNAL) + Texture2D r_input_opaque_only : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY); Texture2D r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR); - Texture2D r_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS); - Texture2D r_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH); - Texture2D r_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE); + Texture2D r_input_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS); + Texture2D r_input_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH); + Texture2D r_input_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE); + Texture2D r_auto_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE); Texture2D r_reactive_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK); Texture2D r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK); Texture2D r_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH); Texture2D r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS); + Texture2D r_previous_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS); Texture2D r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH); Texture2D r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR); - Texture2D r_lock_status : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS); - Texture2D r_depth_clip : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP); - Texture2D r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR); - Texture2D r_luma_history : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY); + Texture2D r_lock_status : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS); + Texture2D r_lock_input_luma : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA); + Texture2D r_new_locks : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS); + Texture2D r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR); + Texture2D r_luma_history : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY); Texture2D r_rcas_input : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT); Texture2D r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT); - Texture2D r_imgMips : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE); + Texture2D r_imgMips : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE); Texture2D r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT); - Texture2D r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS); - Texture2D r_debug_out : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT); + Texture2D r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS); + Texture2D r_input_prev_color_pre_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR); + Texture2D r_input_prev_color_post_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR); - // declarations not current form, no accessor functions - Texture2D r_transparency_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_TRANSPARENCY_MASK); - Texture2D r_bias_current_color_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_BIAS_CURRENT_COLOR_MASK); - Texture2D r_gbuffer_albedo : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_ALBEDO); - Texture2D r_gbuffer_roughness : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_ROUGHNESS); - Texture2D r_gbuffer_metallic : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_METALLIC); - Texture2D r_gbuffer_specular : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_SPECULAR); - Texture2D r_gbuffer_subsurface : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_SUBSURFACE); - Texture2D r_gbuffer_normals : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_NORMALS); - Texture2D r_gbuffer_shading_mode_id : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_SHADING_MODE_ID); - Texture2D r_gbuffer_material_id : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_MATERIAL_ID); - Texture2D r_motion_vectors_3d : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_VELOCITY_3D); - Texture2D r_is_particle_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_IS_PARTICLE_MASK); - Texture2D r_animated_texture_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_ANIMATED_TEXTURE_MASK); - Texture2D r_depth_high_res : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_HIGH_RES); - Texture2D r_position_view_space : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_POSITION_VIEW_SPACE); - Texture2D r_ray_tracing_hit_distance : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RAY_TRACING_HIT_DISTANCE); - Texture2D r_motion_vectors_reflection : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_VELOCITY_REFLECTION); + Texture2D r_debug_out : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT); // UAV declarations RWTexture2D rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH); RWTexture2D rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS); RWTexture2D rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH); RWTexture2D rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR); - RWTexture2D rw_lock_status : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS); - RWTexture2D rw_depth_clip : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP); - RWTexture2D rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR); - RWTexture2D rw_luma_history : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY); + RWTexture2D rw_lock_status : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS); + RWTexture2D rw_lock_input_luma : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA); + RWTexture2D rw_new_locks : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS); + RWTexture2D rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR); + RWTexture2D rw_luma_history : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY); RWTexture2D rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT); - globallycoherent RWTexture2D rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_SHADING_CHANGE); - globallycoherent RWTexture2D rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_5); - RWTexture2D rw_dilated_reactive_masks : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS); - RWTexture2D rw_exposure : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_EXPOSURE); - globallycoherent RWTexture2D rw_spd_global_atomic : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT); - RWTexture2D rw_debug_out : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT); + globallycoherent RWTexture2D rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE); + globallycoherent RWTexture2D rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5); + RWTexture2D rw_dilated_reactive_masks : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS); + RWTexture2D rw_auto_exposure : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE); + globallycoherent RWTexture2D rw_spd_global_atomic : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT); + RWTexture2D rw_debug_out : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT); + RWTexture2D rw_output_autoreactive : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE); + RWTexture2D rw_output_autocomposition : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION); + RWTexture2D rw_output_prev_color_pre_alpha : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR); + RWTexture2D rw_output_prev_color_post_alpha : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR); + #else // #if defined(FFX_INTERNAL) #if defined FSR2_BIND_SRV_INPUT_COLOR - Texture2D r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_COLOR); + Texture2D r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_COLOR); #endif - #if defined FSR2_BIND_SRV_MOTION_VECTORS - Texture2D r_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_MOTION_VECTORS); + #if defined FSR2_BIND_SRV_INPUT_OPAQUE_ONLY + Texture2D r_input_opaque_only : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY); #endif - #if defined FSR2_BIND_SRV_DEPTH - Texture2D r_depth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DEPTH); + #if defined FSR2_BIND_SRV_INPUT_MOTION_VECTORS + Texture2D r_input_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_MOTION_VECTORS); + #endif + #if defined FSR2_BIND_SRV_INPUT_DEPTH + Texture2D r_input_depth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_DEPTH); #endif - #if defined FSR2_BIND_SRV_EXPOSURE - Texture2D r_exposure : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_EXPOSURE); + #if defined FSR2_BIND_SRV_INPUT_EXPOSURE + Texture2D r_input_exposure : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_EXPOSURE); + #endif + #if defined FSR2_BIND_SRV_AUTO_EXPOSURE + Texture2D r_auto_exposure : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_AUTO_EXPOSURE); #endif #if defined FSR2_BIND_SRV_REACTIVE_MASK - Texture2D r_reactive_mask : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_REACTIVE_MASK); + Texture2D r_reactive_mask : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_REACTIVE_MASK); #endif #if defined FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK - Texture2D r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK); + Texture2D r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK); #endif #if defined FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH - Texture2D r_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH); + Texture2D r_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH); #endif #if defined FSR2_BIND_SRV_DILATED_MOTION_VECTORS - Texture2D r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_MOTION_VECTORS); + Texture2D r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_MOTION_VECTORS); + #endif + #if defined FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS + Texture2D r_previous_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS); #endif #if defined FSR2_BIND_SRV_DILATED_DEPTH - Texture2D r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_DEPTH); + Texture2D r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_DEPTH); #endif #if defined FSR2_BIND_SRV_INTERNAL_UPSCALED - Texture2D r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INTERNAL_UPSCALED); + Texture2D r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INTERNAL_UPSCALED); #endif #if defined FSR2_BIND_SRV_LOCK_STATUS - Texture2D r_lock_status : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_STATUS); + Texture2D r_lock_status : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_STATUS); #endif - #if defined FSR2_BIND_SRV_DEPTH_CLIP - Texture2D r_depth_clip : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DEPTH_CLIP); + #if defined FSR2_BIND_SRV_LOCK_INPUT_LUMA + Texture2D r_lock_input_luma : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_INPUT_LUMA); + #endif + #if defined FSR2_BIND_SRV_NEW_LOCKS + Texture2D r_new_locks : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_NEW_LOCKS); #endif #if defined FSR2_BIND_SRV_PREPARED_INPUT_COLOR - Texture2D r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR); + Texture2D r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR); #endif #if defined FSR2_BIND_SRV_LUMA_HISTORY - Texture2D r_luma_history : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY); + Texture2D r_luma_history : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY); #endif #if defined FSR2_BIND_SRV_RCAS_INPUT - Texture2D r_rcas_input : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RCAS_INPUT); + Texture2D r_rcas_input : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RCAS_INPUT); #endif #if defined FSR2_BIND_SRV_LANCZOS_LUT - Texture2D r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LANCZOS_LUT); + Texture2D r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LANCZOS_LUT); #endif - #if defined FSR2_BIND_SRV_EXPOSURE_MIPS - Texture2D r_imgMips : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_EXPOSURE_MIPS); + #if defined FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS + Texture2D r_imgMips : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS); #endif #if defined FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT - Texture2D r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT); + Texture2D r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT); #endif #if defined FSR2_BIND_SRV_DILATED_REACTIVE_MASKS - Texture2D r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS); + Texture2D r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS); #endif + #if defined FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR + Texture2D r_input_prev_color_pre_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR); + #endif + #if defined FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR + Texture2D r_input_prev_color_post_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR); + #endif + // UAV declarations #if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH - RWTexture2D rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH); + RWTexture2D rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH); #endif #if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS - RWTexture2D rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_MOTION_VECTORS); + RWTexture2D rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_MOTION_VECTORS); #endif #if defined FSR2_BIND_UAV_DILATED_DEPTH - RWTexture2D rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_DEPTH); + RWTexture2D rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_DEPTH); #endif #if defined FSR2_BIND_UAV_INTERNAL_UPSCALED - RWTexture2D rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_INTERNAL_UPSCALED); + RWTexture2D rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_INTERNAL_UPSCALED); #endif #if defined FSR2_BIND_UAV_LOCK_STATUS - RWTexture2D rw_lock_status : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_STATUS); + RWTexture2D rw_lock_status : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_STATUS); #endif - #if defined FSR2_BIND_UAV_DEPTH_CLIP - RWTexture2D rw_depth_clip : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DEPTH_CLIP); + #if defined FSR2_BIND_UAV_LOCK_INPUT_LUMA + RWTexture2D rw_lock_input_luma : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_INPUT_LUMA); + #endif + #if defined FSR2_BIND_UAV_NEW_LOCKS + RWTexture2D rw_new_locks : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_NEW_LOCKS); #endif #if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR - RWTexture2D rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR); + RWTexture2D rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR); #endif #if defined FSR2_BIND_UAV_LUMA_HISTORY - RWTexture2D rw_luma_history : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY); + RWTexture2D rw_luma_history : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY); #endif #if defined FSR2_BIND_UAV_UPSCALED_OUTPUT - RWTexture2D rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_UPSCALED_OUTPUT); + RWTexture2D rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_UPSCALED_OUTPUT); #endif #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE - globallycoherent RWTexture2D rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE); + globallycoherent RWTexture2D rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE); #endif #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 - globallycoherent RWTexture2D rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_5); + globallycoherent RWTexture2D rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_5); #endif #if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS - RWTexture2D rw_dilated_reactive_masks : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS); + RWTexture2D rw_dilated_reactive_masks : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS); #endif #if defined FSR2_BIND_UAV_EXPOSURE - RWTexture2D rw_exposure : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE); + RWTexture2D rw_exposure : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE); + #endif + #if defined FSR2_BIND_UAV_AUTO_EXPOSURE + RWTexture2D rw_auto_exposure : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTO_EXPOSURE); #endif #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC - globallycoherent RWTexture2D rw_spd_global_atomic : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC); + globallycoherent RWTexture2D rw_spd_global_atomic : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC); + #endif + + #if defined FSR2_BIND_UAV_AUTOREACTIVE + RWTexture2D rw_output_autoreactive : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTOREACTIVE); + #endif + #if defined FSR2_BIND_UAV_AUTOCOMPOSITION + RWTexture2D rw_output_autocomposition : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTOCOMPOSITION); + #endif + #if defined FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR + RWTexture2D rw_output_prev_color_pre_alpha : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR); + #endif + #if defined FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR + RWTexture2D rw_output_prev_color_post_alpha : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR); #endif #endif // #if defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) || defined(FFX_INTERNAL) FfxFloat32 LoadMipLuma(FfxUInt32x2 iPxPos, FfxUInt32 mipLevel) { -#if defined(FSR2_BIND_SRV_EXPOSURE_MIPS) || defined(FFX_INTERNAL) return r_imgMips.mips[mipLevel][iPxPos]; -#else - return 0.f; -#endif } +#endif +#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) || defined(FFX_INTERNAL) FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxUInt32 mipLevel) { -#if defined(FSR2_BIND_SRV_EXPOSURE_MIPS) || defined(FFX_INTERNAL) - fUV *= depthclip_uv_scale; return r_imgMips.SampleLevel(s_LinearClamp, fUV, mipLevel); -#else - return 0.f; -#endif - -} - -// -// a 0 0 0 x -// 0 b 0 0 y -// 0 0 c d z -// 0 0 e 0 1 -// -// z' = (z*c+d)/(z*e) -// z' = (c/e) + d/(z*e) -// z' - (c/e) = d/(z*e) -// (z'e - c)/e = d/(z*e) -// e / (z'e - c) = (z*e)/d -// (e * d) / (z'e - c) = z*e -// z = d / (z'e - c) -FfxFloat32 ConvertFromDeviceDepthToViewSpace(FfxFloat32 fDeviceDepth) -{ - return -fDeviceToViewDepth[2] / (fDeviceDepth * fDeviceToViewDepth[1] - fDeviceToViewDepth[0]); } +#endif +#if defined(FSR2_BIND_SRV_INPUT_DEPTH) || defined(FFX_INTERNAL) FfxFloat32 LoadInputDepth(FfxUInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_DEPTH) || defined(FFX_INTERNAL) - return r_depth[iPxPos]; -#else - return 0.f; + return r_input_depth[iPxPos]; +} #endif + +#if defined(FSR2_BIND_SRV_INPUT_DEPTH) || defined(FFX_INTERNAL) +FfxFloat32 SampleInputDepth(FfxFloat32x2 fUV) +{ + return r_input_depth.SampleLevel(s_LinearClamp, fUV, 0).x; } +#endif +#if defined(FSR2_BIND_SRV_REACTIVE_MASK) || defined(FFX_INTERNAL) FfxFloat32 LoadReactiveMask(FfxUInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_REACTIVE_MASK) || defined(FFX_INTERNAL) return r_reactive_mask[iPxPos]; -#else - return 0.f; -#endif } - -FfxFloat32x4 GatherReactiveMask(FfxUInt32x2 iPxPos) -{ -#if defined(FSR2_BIND_SRV_REACTIVE_MASK) || defined(FFX_INTERNAL) - return r_reactive_mask.GatherRed(s_LinearClamp, FfxFloat32x2(iPxPos) * reactive_mask_dim_rcp); -#else - return 0.f; #endif -} +#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) || defined(FFX_INTERNAL) FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) || defined(FFX_INTERNAL) return r_transparency_and_composition_mask[iPxPos]; -#else - return 0.f; -#endif } - -FfxFloat32 SampleTransparencyAndCompositionMask(FfxFloat32x2 fUV) -{ -#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) || defined(FFX_INTERNAL) - fUV *= depthclip_uv_scale; - return r_transparency_and_composition_mask.SampleLevel(s_LinearClamp, fUV, 0); -#else - return 0.f; #endif -} - -FfxFloat32 PreExposure() -{ - return fPreExposure; -} +#if defined(FSR2_BIND_SRV_INPUT_COLOR) || defined(FFX_INTERNAL) FfxFloat32x3 LoadInputColor(FfxUInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_INPUT_COLOR) || defined(FFX_INTERNAL) - return r_input_color_jittered[iPxPos].rgb / PreExposure(); -#else - return 0; -#endif + return r_input_color_jittered[iPxPos].rgb; } +#endif -FfxFloat32x3 LoadInputColorWithoutPreExposure(FfxUInt32x2 iPxPos) -{ #if defined(FSR2_BIND_SRV_INPUT_COLOR) || defined(FFX_INTERNAL) - return r_input_color_jittered[iPxPos].rgb; -#else - return 0; -#endif +FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV) +{ + return r_input_color_jittered.SampleLevel(s_LinearClamp, fUV, 0).rgb; } +#endif +#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL) FfxFloat32x3 LoadPreparedInputColor(FfxUInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL) - return r_prepared_input_color[iPxPos].rgb; -#else - return 0.f; -#endif + return r_prepared_input_color[iPxPos].xyz; } - -FfxFloat32 LoadPreparedInputColorLuma(FfxUInt32x2 iPxPos) -{ -#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL) - return r_prepared_input_color[iPxPos].a; -#else - return 0.f; #endif -} +#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS) || defined(FFX_INTERNAL) FfxFloat32x2 LoadInputMotionVector(FfxUInt32x2 iPxDilatedMotionVectorPos) { -#if defined(FSR2_BIND_SRV_MOTION_VECTORS) || defined(FFX_INTERNAL) - FfxFloat32x2 fSrcMotionVector = r_motion_vectors[iPxDilatedMotionVectorPos].xy; -#else - FfxFloat32x2 fSrcMotionVector = 0.f; -#endif + FfxFloat32x2 fSrcMotionVector = r_input_motion_vectors[iPxDilatedMotionVectorPos].xy; - FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale; + FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale(); #if FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS - fUvMotionVector -= fMotionVectorJitterCancellation; + fUvMotionVector -= MotionVectorJitterCancellation(); #endif return fUvMotionVector; } +#endif +#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL) FfxFloat32x4 LoadHistory(FfxUInt32x2 iPxHistory) { -#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL) return r_internal_upscaled_color[iPxHistory]; -#else - return 0.f; -#endif } - -FfxFloat32x4 LoadRwInternalUpscaledColorAndWeight(FfxUInt32x2 iPxPos) -{ -#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL) - return rw_internal_upscaled_color[iPxPos]; -#else - return 0.f; #endif -} +#if defined(FSR2_BIND_UAV_LUMA_HISTORY) || defined(FFX_INTERNAL) void StoreLumaHistory(FfxUInt32x2 iPxPos, FfxFloat32x4 fLumaHistory) { -#if defined(FSR2_BIND_UAV_LUMA_HISTORY) || defined(FFX_INTERNAL) rw_luma_history[iPxPos] = fLumaHistory; -#endif } - -FfxFloat32x4 LoadRwLumaHistory(FfxUInt32x2 iPxPos) -{ -#if defined(FSR2_BIND_UAV_LUMA_HISTORY) || defined(FFX_INTERNAL) - return rw_luma_history[iPxPos]; -#else - return 1.f; #endif -} -FfxFloat32 LoadLumaStabilityFactor(FfxUInt32x2 iPxPos) -{ #if defined(FSR2_BIND_SRV_LUMA_HISTORY) || defined(FFX_INTERNAL) - return r_luma_history[iPxPos].w; -#else - return 0.f; -#endif +FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV) +{ + return r_luma_history.SampleLevel(s_LinearClamp, fUV, 0); } +#endif -FfxFloat32 SampleLumaStabilityFactor(FfxFloat32x2 fUV) +#if defined(FFX_INTERNAL) +FfxFloat32x4 SampleDebug(FfxFloat32x2 fUV) { -#if defined(FSR2_BIND_SRV_LUMA_HISTORY) || defined(FFX_INTERNAL) - fUV *= depthclip_uv_scale; - return r_luma_history.SampleLevel(s_LinearClamp, fUV, 0).w; -#else - return 0.f; -#endif + return r_debug_out.SampleLevel(s_LinearClamp, fUV, 0).w; } +#endif +#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL) void StoreReprojectedHistory(FfxUInt32x2 iPxHistory, FfxFloat32x4 fHistory) { -#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL) rw_internal_upscaled_color[iPxHistory] = fHistory; -#endif } +#endif +#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL) void StoreInternalColorAndWeight(FfxUInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight) { -#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL) rw_internal_upscaled_color[iPxPos] = fColorAndWeight; -#endif } +#endif +#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT) || defined(FFX_INTERNAL) void StoreUpscaledOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor) { -#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT) || defined(FFX_INTERNAL) - rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor * PreExposure(), 1.f); -#endif + rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor, 1.f); } +#endif //LOCK_LIFETIME_REMAINING == 0 //Should make LockInitialLifetime() return a const 1.0f later -FfxFloat32x3 LoadLockStatus(FfxUInt32x2 iPxPos) -{ #if defined(FSR2_BIND_SRV_LOCK_STATUS) || defined(FFX_INTERNAL) - FfxFloat32x3 fLockStatus = r_lock_status[iPxPos]; - - fLockStatus[0] -= LockInitialLifetime() * 2.0f; - return fLockStatus; -#else - return 0.f; -#endif - - -} - -FfxFloat32x3 LoadRwLockStatus(FfxUInt32x2 iPxPos) +FfxFloat32x2 LoadLockStatus(FfxUInt32x2 iPxPos) { -#if defined(FSR2_BIND_UAV_LOCK_STATUS) || defined(FFX_INTERNAL) - FfxFloat32x3 fLockStatus = rw_lock_status[iPxPos]; - - fLockStatus[0] -= LockInitialLifetime() * 2.0f; - - return fLockStatus; -#else - return 0.f; -#endif + return r_lock_status[iPxPos]; } +#endif -void StoreLockStatus(FfxUInt32x2 iPxPos, FfxFloat32x3 fLockstatus) -{ #if defined(FSR2_BIND_UAV_LOCK_STATUS) || defined(FFX_INTERNAL) - fLockstatus[0] += LockInitialLifetime() * 2.0f; - - rw_lock_status[iPxPos] = fLockstatus; -#endif +void StoreLockStatus(FfxUInt32x2 iPxPos, FfxFloat32x2 fLockStatus) +{ + rw_lock_status[iPxPos] = fLockStatus; } +#endif -void StorePreparedInputColor(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped) +#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA) || defined(FFX_INTERNAL) +FfxFloat32 LoadLockInputLuma(FfxUInt32x2 iPxPos) { -#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL) - rw_prepared_input_color[iPxPos] = fTonemapped; -#endif + return r_lock_input_luma[iPxPos]; } +#endif -FfxBoolean IsResponsivePixel(FfxUInt32x2 iPxPos) +#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA) || defined(FFX_INTERNAL) +void StoreLockInputLuma(FfxUInt32x2 iPxPos, FfxFloat32 fLuma) { - return FFX_FALSE; //not supported in prototype + rw_lock_input_luma[iPxPos] = fLuma; } +#endif -FfxFloat32 LoadDepthClip(FfxUInt32x2 iPxPos) +#if defined(FSR2_BIND_SRV_NEW_LOCKS) || defined(FFX_INTERNAL) +FfxFloat32 LoadNewLocks(FfxUInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_DEPTH_CLIP) || defined(FFX_INTERNAL) - return r_depth_clip[iPxPos]; -#else - return 0.f; -#endif + return r_new_locks[iPxPos]; } +#endif -FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV) +#if defined(FSR2_BIND_UAV_NEW_LOCKS) || defined(FFX_INTERNAL) +FfxFloat32 LoadRwNewLocks(FfxUInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_DEPTH_CLIP) || defined(FFX_INTERNAL) - fUV *= depthclip_uv_scale; - return r_depth_clip.SampleLevel(s_LinearClamp, fUV, 0); -#else - return 0.f; -#endif + return rw_new_locks[iPxPos]; } +#endif -FfxFloat32x3 SampleLockStatus(FfxFloat32x2 fUV) +#if defined(FSR2_BIND_UAV_NEW_LOCKS) || defined(FFX_INTERNAL) +void StoreNewLocks(FfxUInt32x2 iPxPos, FfxFloat32 newLock) { -#if defined(FSR2_BIND_SRV_LOCK_STATUS) || defined(FFX_INTERNAL) - fUV *= postprocessed_lockstatus_uv_scale; - FfxFloat32x3 fLockStatus = r_lock_status.SampleLevel(s_LinearClamp, fUV, 0); - fLockStatus[0] -= LockInitialLifetime() * 2.0f; - return fLockStatus; -#else - return 0.f; -#endif + rw_new_locks[iPxPos] = newLock; } +#endif -void StoreDepthClip(FfxUInt32x2 iPxPos, FfxFloat32 fClip) +#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL) +void StorePreparedInputColor(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped) { -#if defined(FSR2_BIND_UAV_DEPTH_CLIP) || defined(FFX_INTERNAL) - rw_depth_clip[iPxPos] = fClip; -#endif + rw_prepared_input_color[iPxPos] = fTonemapped; } +#endif -FfxFloat32 TanHalfFoV() +#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL) +FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV) { - return fTanHalfFOV; + return r_prepared_input_color.SampleLevel(s_LinearClamp, fUV, 0).w; } +#endif -FfxFloat32 LoadSceneDepth(FfxUInt32x2 iPxInput) +#if defined(FSR2_BIND_SRV_LOCK_STATUS) || defined(FFX_INTERNAL) +FfxFloat32x2 SampleLockStatus(FfxFloat32x2 fUV) { -#if defined(FSR2_BIND_SRV_DEPTH) || defined(FFX_INTERNAL) - return r_depth[iPxInput]; -#else - return 0.f; -#endif + FfxFloat32x2 fLockStatus = r_lock_status.SampleLevel(s_LinearClamp, fUV, 0); + return fLockStatus; } +#endif +#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL) FfxFloat32 LoadReconstructedPrevDepth(FfxUInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL) return asfloat(r_reconstructed_previous_nearest_depth[iPxPos]); -#else - return 0; -#endif } +#endif +#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL) void StoreReconstructedDepth(FfxUInt32x2 iPxSample, FfxFloat32 fDepth) { FfxUInt32 uDepth = asuint(fDepth); -#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL) + #if FFX_FSR2_OPTION_INVERTED_DEPTH InterlockedMax(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); #else InterlockedMin(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); // min for standard, max for inverted depth #endif -#endif } +#endif +#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL) void SetReconstructedDepth(FfxUInt32x2 iPxSample, const FfxUInt32 uValue) { -#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL) rw_reconstructed_previous_nearest_depth[iPxSample] = uValue; -#endif } +#endif +#if defined(FSR2_BIND_UAV_DILATED_DEPTH) || defined(FFX_INTERNAL) void StoreDilatedDepth(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth) { -#if defined(FSR2_BIND_UAV_DILATED_DEPTH) || defined(FFX_INTERNAL) rw_dilatedDepth[iPxPos] = fDepth; -#endif } +#endif +#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL) void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector) { -#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL) rw_dilated_motion_vectors[iPxPos] = fMotionVector; -#endif } +#endif +#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL) FfxFloat32x2 LoadDilatedMotionVector(FfxUInt32x2 iPxInput) { -#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL) return r_dilated_motion_vectors[iPxInput].xy; -#else - return 0.f; +} #endif + +#if defined(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL) +FfxFloat32x2 LoadPreviousDilatedMotionVector(FfxUInt32x2 iPxInput) +{ + return r_previous_dilated_motion_vectors[iPxInput].xy; } -FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV) +FfxFloat32x2 SamplePreviousDilatedMotionVector(FfxFloat32x2 uv) { -#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL) - fUV *= depthclip_uv_scale; // TODO: assuming these are (RenderSize() / MaxRenderSize()) - return r_dilated_motion_vectors.SampleLevel(s_LinearClamp, fUV, 0); -#else - return 0.f; -#endif + return r_previous_dilated_motion_vectors.SampleLevel(s_LinearClamp, uv, 0).xy; } +#endif +#if defined(FSR2_BIND_SRV_DILATED_DEPTH) || defined(FFX_INTERNAL) FfxFloat32 LoadDilatedDepth(FfxUInt32x2 iPxInput) { -#if defined(FSR2_BIND_SRV_DILATED_DEPTH) || defined(FFX_INTERNAL) return r_dilatedDepth[iPxInput]; -#else - return 0.f; -#endif } +#endif +#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE) || defined(FFX_INTERNAL) FfxFloat32 Exposure() { - // return 1.0f; - #if defined(FSR2_BIND_SRV_EXPOSURE) || defined(FFX_INTERNAL) - FfxFloat32 exposure = r_exposure[FfxUInt32x2(0, 0)].x; - #else - FfxFloat32 exposure = 1.f; - #endif + FfxFloat32 exposure = r_input_exposure[FfxUInt32x2(0, 0)].x; + + if (exposure == 0.0f) { + exposure = 1.0f; + } + + return exposure; +} +#endif + +#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE) || defined(FFX_INTERNAL) +FfxFloat32 AutoExposure() +{ + FfxFloat32 exposure = r_auto_exposure[FfxUInt32x2(0, 0)].x; if (exposure == 0.0f) { exposure = 1.0f; @@ -799,6 +709,7 @@ FfxFloat32 Exposure() return exposure; } +#endif FfxFloat32 SampleLanczos2Weight(FfxFloat32 x) { @@ -809,40 +720,80 @@ FfxFloat32 SampleLanczos2Weight(FfxFloat32 x) #endif } +#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) || defined(FFX_INTERNAL) FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv) { -#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) || defined(FFX_INTERNAL) // Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range. return FfxFloat32(2.0) * r_upsample_maximum_bias_lut.SampleLevel(s_LinearClamp, abs(uv) * 2.0, 0); -#else - return 0.f; -#endif } +#endif +#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL) FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV) { -#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL) - fUV *= depthclip_uv_scale; return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0); -#else - return 0.f; -#endif } +#endif +#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL) FfxFloat32x2 LoadDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos) { -#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL) return r_dilated_reactive_masks[iPxPos]; -#else - return 0.f; -#endif } +#endif +#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL) void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fDilatedReactiveMasks) { -#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL) rw_dilated_reactive_masks[iPxPos] = fDilatedReactiveMasks; +} +#endif + +#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) || defined(FFX_INTERNAL) +FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return r_input_opaque_only[iPxPos].xyz; +} +#endif + +#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) || defined(FFX_INTERNAL) +FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return r_input_prev_color_pre_alpha[iPxPos]; +} +#endif + +#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) || defined(FFX_INTERNAL) +FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return r_input_prev_color_post_alpha[iPxPos]; +} +#endif + +#if defined(FSR2_BIND_UAV_AUTOREACTIVE) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_UAV_AUTOCOMPOSITION) || defined(FFX_INTERNAL) +void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive) +{ + rw_output_autoreactive[iPxPos] = fReactive.x; + + rw_output_autocomposition[iPxPos] = fReactive.y; +} +#endif +#endif + +#if defined(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR) || defined(FFX_INTERNAL) +void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) +{ + rw_output_prev_color_pre_alpha[iPxPos] = color; + +} #endif + +#if defined(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR) || defined(FFX_INTERNAL) +void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) +{ + rw_output_prev_color_post_alpha[iPxPos] = color; } +#endif #endif // #if defined(FFX_GPU) diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_common.h b/Assets/Resources/FSR2/shaders/ffx_fsr2_common.h index 7f6acf2..0c72aa8 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_common.h +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_common.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -26,12 +26,13 @@ //Locks #define LOCK_LIFETIME_REMAINING 0 #define LOCK_TEMPORAL_LUMA 1 -#define LOCK_TRUST 2 #endif // #if defined(FFX_CPU) || defined(FFX_GPU) #if defined(FFX_GPU) +FFX_STATIC const FfxFloat32 FSR2_FP16_MIN = 6.10e-05f; +FFX_STATIC const FfxFloat32 FSR2_FP16_MAX = 65504.0f; FFX_STATIC const FfxFloat32 FSR2_EPSILON = 1e-03f; -FFX_STATIC const FfxFloat32 FSR2_TONEMAP_EPSILON = 1e-03f; +FFX_STATIC const FfxFloat32 FSR2_TONEMAP_EPSILON = 1.0f / FSR2_FP16_MAX; FFX_STATIC const FfxFloat32 FSR2_FLT_MAX = 3.402823466e+38f; FFX_STATIC const FfxFloat32 FSR2_FLT_MIN = 1.175494351e-38f; @@ -43,162 +44,174 @@ FFX_STATIC const FfxFloat32 FSR2_FLT_MIN = 1.175494351e-38f; #pragma warning(disable: 3571) // in ffxPow(f, e), f could be negative // Reconstructed depth usage -FFX_STATIC const FfxFloat32 reconstructedDepthBilinearWeightThreshold = 0.05f; +FFX_STATIC const FfxFloat32 fReconstructedDepthBilinearWeightThreshold = 0.01f; // Accumulation -FFX_STATIC const FfxFloat32 averageLanczosWeightPerFrame = 0.74f; // Average lanczos weight for jitter accumulated samples -FFX_STATIC const FfxFloat32 accumulationMaxOnMotion = 4.0f; +FFX_STATIC const FfxFloat32 fUpsampleLanczosWeightScale = 1.0f / 12.0f; +FFX_STATIC const FfxFloat32 fMaxAccumulationLanczosWeight = 1.0f; +FFX_STATIC const FfxFloat32 fAverageLanczosWeightPerFrame = 0.74f * fUpsampleLanczosWeightScale; // Average lanczos weight for jitter accumulated samples +FFX_STATIC const FfxFloat32 fAccumulationMaxOnMotion = 3.0f * fUpsampleLanczosWeightScale; // Auto exposure FFX_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e8f; +struct AccumulationPassCommonParams +{ + FfxInt32x2 iPxHrPos; + FfxFloat32x2 fHrUv; + FfxFloat32x2 fLrUv_HwSampler; + FfxFloat32x2 fMotionVector; + FfxFloat32x2 fReprojectedHrUv; + FfxFloat32 fHrVelocity; + FfxFloat32 fDepthClipFactor; + FfxFloat32 fDilatedReactiveFactor; + FfxFloat32 fAccumulationMask; + + FfxBoolean bIsResetFrame; + FfxBoolean bIsExistingSample; + FfxBoolean bIsNewSample; +}; + struct LockState { FfxBoolean NewLock; //Set for both unique new and re-locked new FfxBoolean WasLockedPrevFrame; //Set to identify if the pixel was already locked (relock) }; -FfxFloat32 GetNormalizedRemainingLockLifetime(FfxFloat32x3 fLockStatus) +void InitializeNewLockSample(FFX_PARAMETER_OUT FfxFloat32x2 fLockStatus) { - const FfxFloat32 fTrust = fLockStatus[LOCK_TRUST]; - - return ffxSaturate(fLockStatus[LOCK_LIFETIME_REMAINING] - LockInitialLifetime()) / LockInitialLifetime() * fTrust; + fLockStatus = FfxFloat32x2(0, 0); } #if FFX_HALF -FFX_MIN16_F GetNormalizedRemainingLockLifetime(FFX_MIN16_F3 fLockStatus) +void InitializeNewLockSample(FFX_PARAMETER_OUT FFX_MIN16_F2 fLockStatus) { - const FFX_MIN16_F fTrust = fLockStatus[LOCK_TRUST]; - const FFX_MIN16_F fInitialLockLifetime = FFX_MIN16_F(LockInitialLifetime()); - - return ffxSaturate(fLockStatus[LOCK_LIFETIME_REMAINING] - fInitialLockLifetime) / fInitialLockLifetime * fTrust; + fLockStatus = FFX_MIN16_F2(0, 0); } #endif -void InitializeNewLockSample(FFX_PARAMETER_OUT FfxFloat32x3 fLockStatus) -{ - fLockStatus = FfxFloat32x3(0, 0, 1); // LOCK_TRUST to 1 -} -#if FFX_HALF -void InitializeNewLockSample(FFX_PARAMETER_OUT FFX_MIN16_F3 fLockStatus) -{ - fLockStatus = FFX_MIN16_F3(0, 0, 1); // LOCK_TRUST to 1 -} -#endif - - -void KillLock(FFX_PARAMETER_INOUT FfxFloat32x3 fLockStatus) +void KillLock(FFX_PARAMETER_INOUT FfxFloat32x2 fLockStatus) { fLockStatus[LOCK_LIFETIME_REMAINING] = 0; } #if FFX_HALF -void KillLock(FFX_PARAMETER_INOUT FFX_MIN16_F3 fLockStatus) +void KillLock(FFX_PARAMETER_INOUT FFX_MIN16_F2 fLockStatus) { fLockStatus[LOCK_LIFETIME_REMAINING] = FFX_MIN16_F(0); } #endif -struct RectificationBoxData +struct RectificationBox { FfxFloat32x3 boxCenter; FfxFloat32x3 boxVec; FfxFloat32x3 aabbMin; FfxFloat32x3 aabbMax; + FfxFloat32 fBoxCenterWeight; }; #if FFX_HALF -struct RectificationBoxDataMin16 +struct RectificationBoxMin16 { FFX_MIN16_F3 boxCenter; FFX_MIN16_F3 boxVec; FFX_MIN16_F3 aabbMin; FFX_MIN16_F3 aabbMax; -}; -#endif - -struct RectificationBox -{ - RectificationBoxData data_; - FfxFloat32 fBoxCenterWeight; -}; -#if FFX_HALF -struct RectificationBoxMin16 -{ - RectificationBoxDataMin16 data_; FFX_MIN16_F fBoxCenterWeight; }; #endif -void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 initialColorSample) +void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBox rectificationBox) { rectificationBox.fBoxCenterWeight = FfxFloat32(0); - rectificationBox.data_.boxCenter = FfxFloat32x3(0, 0, 0); - rectificationBox.data_.boxVec = FfxFloat32x3(0, 0, 0); - rectificationBox.data_.aabbMin = initialColorSample; - rectificationBox.data_.aabbMax = initialColorSample; + rectificationBox.boxCenter = FfxFloat32x3(0, 0, 0); + rectificationBox.boxVec = FfxFloat32x3(0, 0, 0); + rectificationBox.aabbMin = FfxFloat32x3(FSR2_FLT_MAX, FSR2_FLT_MAX, FSR2_FLT_MAX); + rectificationBox.aabbMax = -FfxFloat32x3(FSR2_FLT_MAX, FSR2_FLT_MAX, FSR2_FLT_MAX); } #if FFX_HALF -void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 initialColorSample) +void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox) { rectificationBox.fBoxCenterWeight = FFX_MIN16_F(0); - rectificationBox.data_.boxCenter = FFX_MIN16_F3(0, 0, 0); - rectificationBox.data_.boxVec = FFX_MIN16_F3(0, 0, 0); - rectificationBox.data_.aabbMin = initialColorSample; - rectificationBox.data_.aabbMax = initialColorSample; + rectificationBox.boxCenter = FFX_MIN16_F3(0, 0, 0); + rectificationBox.boxVec = FFX_MIN16_F3(0, 0, 0); + rectificationBox.aabbMin = FFX_MIN16_F3(FSR2_FP16_MAX, FSR2_FP16_MAX, FSR2_FP16_MAX); + rectificationBox.aabbMax = -FFX_MIN16_F3(FSR2_FP16_MAX, FSR2_FP16_MAX, FSR2_FP16_MAX); } #endif -void RectificationBoxAddSample(FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight) +void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight) { - rectificationBox.data_.aabbMin = ffxMin(rectificationBox.data_.aabbMin, colorSample); - rectificationBox.data_.aabbMax = ffxMax(rectificationBox.data_.aabbMax, colorSample); + rectificationBox.aabbMin = colorSample; + rectificationBox.aabbMax = colorSample; + FfxFloat32x3 weightedSample = colorSample * fSampleWeight; - rectificationBox.data_.boxCenter += weightedSample; - rectificationBox.data_.boxVec += colorSample * weightedSample; - rectificationBox.fBoxCenterWeight += fSampleWeight; + rectificationBox.boxCenter = weightedSample; + rectificationBox.boxVec = colorSample * weightedSample; + rectificationBox.fBoxCenterWeight = fSampleWeight; +} + +void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight) +{ + if (bInitialSample) { + RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight); + } else { + rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample); + rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample); + + FfxFloat32x3 weightedSample = colorSample * fSampleWeight; + rectificationBox.boxCenter += weightedSample; + rectificationBox.boxVec += colorSample * weightedSample; + rectificationBox.fBoxCenterWeight += fSampleWeight; + } } #if FFX_HALF -void RectificationBoxAddSample(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight) +void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight) { - rectificationBox.data_.aabbMin = ffxMin(rectificationBox.data_.aabbMin, colorSample); - rectificationBox.data_.aabbMax = ffxMax(rectificationBox.data_.aabbMax, colorSample); + rectificationBox.aabbMin = colorSample; + rectificationBox.aabbMax = colorSample; + FFX_MIN16_F3 weightedSample = colorSample * fSampleWeight; - rectificationBox.data_.boxCenter += weightedSample; - rectificationBox.data_.boxVec += colorSample * weightedSample; - rectificationBox.fBoxCenterWeight += fSampleWeight; + rectificationBox.boxCenter = weightedSample; + rectificationBox.boxVec = colorSample * weightedSample; + rectificationBox.fBoxCenterWeight = fSampleWeight; +} + +void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight) +{ + if (bInitialSample) { + RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight); + } else { + rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample); + rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample); + + FFX_MIN16_F3 weightedSample = colorSample * fSampleWeight; + rectificationBox.boxCenter += weightedSample; + rectificationBox.boxVec += colorSample * weightedSample; + rectificationBox.fBoxCenterWeight += fSampleWeight; + } } #endif void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBox rectificationBox) { rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FfxFloat32(FSR2_EPSILON) ? rectificationBox.fBoxCenterWeight : FfxFloat32(1.f)); - rectificationBox.data_.boxCenter /= rectificationBox.fBoxCenterWeight; - rectificationBox.data_.boxVec /= rectificationBox.fBoxCenterWeight; - FfxFloat32x3 stdDev = sqrt(abs(rectificationBox.data_.boxVec - rectificationBox.data_.boxCenter * rectificationBox.data_.boxCenter)); - rectificationBox.data_.boxVec = stdDev; + rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight; + rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight; + FfxFloat32x3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter)); + rectificationBox.boxVec = stdDev; } #if FFX_HALF void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox) { rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FFX_MIN16_F(FSR2_EPSILON) ? rectificationBox.fBoxCenterWeight : FFX_MIN16_F(1.f)); - rectificationBox.data_.boxCenter /= rectificationBox.fBoxCenterWeight; - rectificationBox.data_.boxVec /= rectificationBox.fBoxCenterWeight; - FFX_MIN16_F3 stdDev = sqrt(abs(rectificationBox.data_.boxVec - rectificationBox.data_.boxCenter * rectificationBox.data_.boxCenter)); - rectificationBox.data_.boxVec = stdDev; -} -#endif - -RectificationBoxData RectificationBoxGetData(FFX_PARAMETER_INOUT RectificationBox rectificationBox) -{ - return rectificationBox.data_; -} -#if FFX_HALF -RectificationBoxDataMin16 RectificationBoxGetData(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox) -{ - return rectificationBox.data_; + rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight; + rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight; + FFX_MIN16_F3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter)); + rectificationBox.boxVec = stdDev; } #endif @@ -231,8 +244,6 @@ FfxFloat32x3 YCoCgToRGB(FfxFloat32x3 fYCoCg) { FfxFloat32x3 fRgb; - fYCoCg.yz -= FfxFloat32x2(0.5f, 0.5f); // [0,1] -> [-0.5,0.5] - fRgb = FfxFloat32x3( fYCoCg.x + fYCoCg.y - fYCoCg.z, fYCoCg.x + fYCoCg.z, @@ -245,8 +256,6 @@ FFX_MIN16_F3 YCoCgToRGB(FFX_MIN16_F3 fYCoCg) { FFX_MIN16_F3 fRgb; - fYCoCg.yz -= FFX_MIN16_F2(0.5f, 0.5f); // [0,1] -> [-0.5,0.5] - fRgb = FFX_MIN16_F3( fYCoCg.x + fYCoCg.y - fYCoCg.z, fYCoCg.x + fYCoCg.z, @@ -265,8 +274,6 @@ FfxFloat32x3 RGBToYCoCg(FfxFloat32x3 fRgb) 0.5f * fRgb.r - 0.5f * fRgb.b, -0.25f * fRgb.r + 0.5f * fRgb.g - 0.25f * fRgb.b); - fYCoCg.yz += FfxFloat32x2(0.5f, 0.5f); // [-0.5,0.5] -> [0,1] - return fYCoCg; } #if FFX_HALF @@ -279,8 +286,6 @@ FFX_MIN16_F3 RGBToYCoCg(FFX_MIN16_F3 fRgb) 0.5 * fRgb.r - 0.5 * fRgb.b, -0.25 * fRgb.r + 0.5 * fRgb.g - 0.25 * fRgb.b); - fYCoCg.yz += FFX_MIN16_F2(0.5, 0.5); // [-0.5,0.5] -> [0,1] - return fYCoCg; } #endif @@ -303,7 +308,8 @@ FfxFloat32 RGBToPerceivedLuma(FfxFloat32x3 fLinearRgb) FfxFloat32 fPercievedLuminance = 0; if (fLuminance <= 216.0f / 24389.0f) { fPercievedLuminance = fLuminance * (24389.0f / 27.0f); - } else { + } + else { fPercievedLuminance = ffxPow(fLuminance, 1.0f / 3.0f) * 116.0f - 16.0f; } @@ -326,7 +332,6 @@ FFX_MIN16_F RGBToPerceivedLuma(FFX_MIN16_F3 fLinearRgb) } #endif - FfxFloat32x3 Tonemap(FfxFloat32x3 fRgb) { return fRgb / (ffxMax(ffxMax(0.f, fRgb.r), ffxMax(fRgb.g, fRgb.b)) + 1.f).xxx; @@ -351,23 +356,46 @@ FFX_MIN16_F3 InverseTonemap(FFX_MIN16_F3 fRgb) FfxInt32x2 ClampLoad(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize) { - return clamp(iPxSample + iPxOffset, FfxInt32x2(0, 0), iTextureSize - FfxInt32x2(1, 1)); + FfxInt32x2 result = iPxSample + iPxOffset; + result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x; + result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x; + result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y; + result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y; + return result; + + // return ffxMed3(iPxSample + iPxOffset, FfxInt32x2(0, 0), iTextureSize - FfxInt32x2(1, 1)); } #if FFX_HALF FFX_MIN16_I2 ClampLoad(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize) { - return clamp(iPxSample + iPxOffset, FFX_MIN16_I2(0, 0), iTextureSize - FFX_MIN16_I2(1, 1)); + FFX_MIN16_I2 result = iPxSample + iPxOffset; + result.x = (iPxOffset.x < 0) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x; + result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x; + result.y = (iPxOffset.y < 0) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y; + result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y; + return result; + + // return ffxMed3Half(iPxSample + iPxOffset, FFX_MIN16_I2(0, 0), iTextureSize - FFX_MIN16_I2(1, 1)); } #endif +FfxFloat32x2 ClampUv(FfxFloat32x2 fUv, FfxInt32x2 iTextureSize, FfxInt32x2 iResourceSize) +{ + const FfxFloat32x2 fSampleLocation = fUv * iTextureSize; + const FfxFloat32x2 fClampedLocation = ffxMax(FfxFloat32x2(0.5f, 0.5f), ffxMin(fSampleLocation, FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f))); + const FfxFloat32x2 fClampedUv = fClampedLocation / FfxFloat32x2(iResourceSize); + + return fClampedUv; +} + FfxBoolean IsOnScreen(FfxInt32x2 pos, FfxInt32x2 size) { - return all(FFX_GREATER_THAN_EQUAL(pos, FfxInt32x2(0, 0))) && all(FFX_LESS_THAN(pos, size)); + return all(FFX_LESS_THAN(FfxUInt32x2(pos), FfxUInt32x2(size))); } #if FFX_HALF FfxBoolean IsOnScreen(FFX_MIN16_I2 pos, FFX_MIN16_I2 size) { - return all(FFX_GREATER_THAN_EQUAL(pos, FFX_MIN16_I2(0, 0))) && all(FFX_LESS_THAN(pos, size)); + return all(FFX_LESS_THAN(FFX_MIN16_U2(pos), FFX_MIN16_U2(size))); } #endif @@ -404,19 +432,134 @@ FfxInt32x2 ComputeHrPosFromLrPos(FfxInt32x2 iPxLrPos) { FfxFloat32x2 fSrcJitteredPos = FfxFloat32x2(iPxLrPos) + 0.5f - Jitter(); FfxFloat32x2 fLrPosInHr = (fSrcJitteredPos / RenderSize()) * DisplaySize(); - FfxFloat32x2 fHrPos = floor(fLrPosInHr) + 0.5f; - return FfxInt32x2(fHrPos); + FfxInt32x2 iPxHrPos = FfxInt32x2(floor(fLrPosInHr)); + return iPxHrPos; } #if FFX_HALF FFX_MIN16_I2 ComputeHrPosFromLrPos(FFX_MIN16_I2 iPxLrPos) { FFX_MIN16_F2 fSrcJitteredPos = FFX_MIN16_F2(iPxLrPos) + FFX_MIN16_F(0.5f) - FFX_MIN16_F2(Jitter()); FFX_MIN16_F2 fLrPosInHr = (fSrcJitteredPos / FFX_MIN16_F2(RenderSize())) * FFX_MIN16_F2(DisplaySize()); - FFX_MIN16_F2 fHrPos = floor(fLrPosInHr) + FFX_MIN16_F(0.5); - return FFX_MIN16_I2(fHrPos); + FFX_MIN16_I2 iPxHrPos = FFX_MIN16_I2(floor(fLrPosInHr)); + return iPxHrPos; } #endif +FfxFloat32x2 ComputeNdc(FfxFloat32x2 fPxPos, FfxInt32x2 iSize) +{ + return fPxPos / FfxFloat32x2(iSize) * FfxFloat32x2(2.0f, -2.0f) + FfxFloat32x2(-1.0f, 1.0f); +} + +FfxFloat32 GetViewSpaceDepth(FfxFloat32 fDeviceDepth) +{ + const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors(); + + // fDeviceToViewDepth details found in ffx_fsr2.cpp + return (fDeviceToViewDepth[1] / (fDeviceDepth - fDeviceToViewDepth[0])); +} + +FfxFloat32 GetViewSpaceDepthInMeters(FfxFloat32 fDeviceDepth) +{ + return GetViewSpaceDepth(fDeviceDepth) * ViewSpaceToMetersFactor(); +} + +FfxFloat32x3 GetViewSpacePosition(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth) +{ + const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors(); + + const FfxFloat32 Z = GetViewSpaceDepth(fDeviceDepth); + + const FfxFloat32x2 fNdcPos = ComputeNdc(iViewportPos, iViewportSize); + const FfxFloat32 X = fDeviceToViewDepth[2] * fNdcPos.x * Z; + const FfxFloat32 Y = fDeviceToViewDepth[3] * fNdcPos.y * Z; + + return FfxFloat32x3(X, Y, Z); +} + +FfxFloat32x3 GetViewSpacePositionInMeters(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth) +{ + return GetViewSpacePosition(iViewportPos, iViewportSize, fDeviceDepth) * ViewSpaceToMetersFactor(); +} + +FfxFloat32 GetMaxDistanceInMeters() +{ +#if FFX_FSR2_OPTION_INVERTED_DEPTH + return GetViewSpaceDepth(0.0f) * ViewSpaceToMetersFactor(); +#else + return GetViewSpaceDepth(1.0f) * ViewSpaceToMetersFactor(); +#endif +} + +FfxFloat32x3 PrepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure, FfxFloat32 fPreExposure) +{ + fRgb /= fPreExposure; + fRgb *= fExposure; + + fRgb = clamp(fRgb, 0.0f, FSR2_FP16_MAX); + + return fRgb; +} + +FfxFloat32x3 UnprepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure) +{ + fRgb /= fExposure; + fRgb *= PreExposure(); + + return fRgb; +} + + +struct BilinearSamplingData +{ + FfxInt32x2 iOffsets[4]; + FfxFloat32 fWeights[4]; + FfxInt32x2 iBasePos; +}; + +BilinearSamplingData GetBilinearSamplingData(FfxFloat32x2 fUv, FfxInt32x2 iSize) +{ + BilinearSamplingData data; + + FfxFloat32x2 fPxSample = (fUv * iSize) - FfxFloat32x2(0.5f, 0.5f); + data.iBasePos = FfxInt32x2(floor(fPxSample)); + FfxFloat32x2 fPxFrac = ffxFract(fPxSample); + + data.iOffsets[0] = FfxInt32x2(0, 0); + data.iOffsets[1] = FfxInt32x2(1, 0); + data.iOffsets[2] = FfxInt32x2(0, 1); + data.iOffsets[3] = FfxInt32x2(1, 1); + + data.fWeights[0] = (1 - fPxFrac.x) * (1 - fPxFrac.y); + data.fWeights[1] = (fPxFrac.x) * (1 - fPxFrac.y); + data.fWeights[2] = (1 - fPxFrac.x) * (fPxFrac.y); + data.fWeights[3] = (fPxFrac.x) * (fPxFrac.y); + + return data; +} + +struct PlaneData +{ + FfxFloat32x3 fNormal; + FfxFloat32 fDistanceFromOrigin; +}; + +PlaneData GetPlaneFromPoints(FfxFloat32x3 fP0, FfxFloat32x3 fP1, FfxFloat32x3 fP2) +{ + PlaneData plane; + + FfxFloat32x3 v0 = fP0 - fP1; + FfxFloat32x3 v1 = fP0 - fP2; + plane.fNormal = normalize(cross(v0, v1)); + plane.fDistanceFromOrigin = -dot(fP0, plane.fNormal); + + return plane; +} + +FfxFloat32 PointToPlaneDistance(PlaneData plane, FfxFloat32x3 fPoint) +{ + return abs(dot(plane.fNormal, fPoint) + plane.fDistanceFromOrigin); +} + #endif // #if defined(FFX_GPU) #endif //!defined(FFX_FSR2_COMMON_H) diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_compute_luminance_pyramid.h b/Assets/Resources/FSR2/shaders/ffx_fsr2_compute_luminance_pyramid.h index d5bbbcf..c63f182 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_compute_luminance_pyramid.h +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_compute_luminance_pyramid.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -29,12 +29,14 @@ FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16]; FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 tex, FfxUInt32 slice) { - FfxFloat32x3 fRgb = LoadInputColor(FfxInt32x2(tex)); + FfxFloat32x2 fUv = (tex + 0.5f + Jitter()) / RenderSize(); + fUv = ClampUv(fUv, RenderSize(), InputColorResourceDimensions()); + FfxFloat32x3 fRgb = SampleInputColor(fUv); - FFX_STATIC const FfxFloat32x3 rgb2y = FfxFloat32x3(0.2126, 0.7152, 0.0722); + fRgb /= PreExposure(); //compute log luma - const FfxFloat32 fLogLuma = log(ffxMax(FSR2_EPSILON, dot(rgb2y, fRgb))); + const FfxFloat32 fLogLuma = log(ffxMax(FSR2_EPSILON, RGBToLuma(fRgb))); // Make sure out of screen pixels contribute no value to the end result const FfxFloat32 result = all(FFX_LESS_THAN(tex, RenderSize())) ? fLogLuma : 0.0f; @@ -59,8 +61,7 @@ void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 if (all(FFX_EQUAL(pix, FfxInt32x2(0, 0)))) { FfxFloat32 prev = SPD_LoadExposureBuffer().y; - FfxUInt32x2 renderSize = SPD_RenderSize(); - FfxFloat32 result = outValue.r / (renderSize.x * renderSize.y); + FfxFloat32 result = outValue.r; if (prev < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values { @@ -105,7 +106,7 @@ void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) } FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) { - return (v0 + v1 + v2 + v3); + return (v0 + v1 + v2 + v3) * 0.25f; } #endif diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl b/Assets/Resources/FSR2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl index 9a6a329..3c99b98 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -28,7 +28,7 @@ #define FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC 1 #define FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE 2 #define FSR2_BIND_UAV_EXPOSURE_MIP_5 3 -#define FSR2_BIND_UAV_EXPOSURE 4 +#define FSR2_BIND_UAV_AUTO_EXPOSURE 4 #define FSR2_BIND_CB_FSR2 5 #define FSR2_BIND_CB_SPD 6 @@ -63,68 +63,35 @@ { return cbSPD.renderSize; } -#else - uint MipCount() - { - return 0; - } - - uint NumWorkGroups() - { - return 0; - } - - uvec2 WorkGroupOffset() - { - return uvec2(0); - } - - uvec2 SPD_RenderSize() - { - return uvec2(0); - } #endif vec2 SPD_LoadExposureBuffer() { -#if defined(FSR2_BIND_UAV_EXPOSURE) - return imageLoad(rw_exposure, ivec2(0,0)).xy; -#else - return vec2(0); -#endif + return imageLoad(rw_auto_exposure, ivec2(0,0)).xy; } void SPD_SetExposureBuffer(vec2 value) { -#if defined(FSR2_BIND_UAV_EXPOSURE) - imageStore(rw_exposure, ivec2(0,0), vec4(value, 0.0f, 0.0f)); -#endif + imageStore(rw_auto_exposure, ivec2(0,0), vec4(value, 0.0f, 0.0f)); } vec4 SPD_LoadMipmap5(ivec2 iPxPos) { -#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_5) return vec4(imageLoad(rw_img_mip_5, iPxPos).x, 0.0f, 0.0f, 0.0f); -#else - return vec4(0); -#endif } void SPD_SetMipmap(ivec2 iPxPos, uint slice, float value) { switch (slice) { -#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE) case FFX_FSR2_SHADING_CHANGE_MIP_LEVEL: imageStore(rw_img_mip_shading_change, iPxPos, vec4(value, 0.0f, 0.0f, 0.0f)); break; -#endif -#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_5) case 5: imageStore(rw_img_mip_5, iPxPos, vec4(value, 0.0f, 0.0f, 0.0f)); break; -#endif default: + // avoid flattened side effect #if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE) imageStore(rw_img_mip_shading_change, iPxPos, vec4(imageLoad(rw_img_mip_shading_change, iPxPos).x, 0.0f, 0.0f, 0.0f)); @@ -137,16 +104,12 @@ void SPD_SetMipmap(ivec2 iPxPos, uint slice, float value) void SPD_IncreaseAtomicCounter(inout uint spdCounter) { -#if defined(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC) spdCounter = imageAtomicAdd(rw_spd_global_atomic, ivec2(0,0), 1); -#endif } void SPD_ResetAtomicCounter() { -#if defined(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC) imageStore(rw_spd_global_atomic, ivec2(0,0), uvec4(0)); -#endif } #include "ffx_fsr2_compute_luminance_pyramid.h" diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.hlsl b/Assets/Resources/FSR2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.hlsl index 07a097a..2b96636 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.hlsl +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.hlsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -23,7 +23,7 @@ #define FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC 0 #define FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE 1 #define FSR2_BIND_UAV_EXPOSURE_MIP_5 2 -#define FSR2_BIND_UAV_EXPOSURE 3 +#define FSR2_BIND_UAV_AUTO_EXPOSURE 3 #define FSR2_BIND_CB_FSR2 0 #define FSR2_BIND_CB_SPD 1 @@ -33,94 +33,61 @@ #if defined(FSR2_BIND_CB_SPD) cbuffer cbSPD : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_SPD) { - uint mips; - uint numWorkGroups; - uint2 workGroupOffset; - uint2 renderSize; + FfxUInt32 mips; + FfxUInt32 numWorkGroups; + FfxUInt32x2 workGroupOffset; + FfxUInt32x2 renderSize; }; - uint MipCount() + FfxUInt32 MipCount() { return mips; } - uint NumWorkGroups() + FfxUInt32 NumWorkGroups() { return numWorkGroups; } - uint2 WorkGroupOffset() + FfxUInt32x2 WorkGroupOffset() { return workGroupOffset; } - uint2 SPD_RenderSize() + FfxUInt32x2 SPD_RenderSize() { return renderSize; } -#else - uint MipCount() - { - return 0; - } - - uint NumWorkGroups() - { - return 0; - } - - uint2 WorkGroupOffset() - { - return uint2(0, 0); - } - - uint2 SPD_RenderSize() - { - return uint2(0, 0); - } #endif -float2 SPD_LoadExposureBuffer() +FfxFloat32x2 SPD_LoadExposureBuffer() { -#if defined(FSR2_BIND_UAV_EXPOSURE) || defined(FFX_INTERNAL) - return rw_exposure[min16int2(0,0)]; -#else - return 0; -#endif + return rw_auto_exposure[FfxInt32x2(0,0)]; } -void SPD_SetExposureBuffer(float2 value) +void SPD_SetExposureBuffer(FfxFloat32x2 value) { -#if defined(FSR2_BIND_UAV_EXPOSURE) || defined(FFX_INTERNAL) - rw_exposure[min16int2(0,0)] = value; -#endif + rw_auto_exposure[FfxInt32x2(0,0)] = value; } -float4 SPD_LoadMipmap5(int2 iPxPos) +FfxFloat32x4 SPD_LoadMipmap5(FfxInt32x2 iPxPos) { -#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_5) || defined(FFX_INTERNAL) - return float4(rw_img_mip_5[iPxPos], 0, 0, 0); -#else - return 0; -#endif + return FfxFloat32x4(rw_img_mip_5[iPxPos], 0, 0, 0); } -void SPD_SetMipmap(int2 iPxPos, int slice, float value) +void SPD_SetMipmap(FfxInt32x2 iPxPos, FfxInt32 slice, FfxFloat32 value) { switch (slice) { -#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE) || defined(FFX_INTERNAL) case FFX_FSR2_SHADING_CHANGE_MIP_LEVEL: rw_img_mip_shading_change[iPxPos] = value; break; -#endif -#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_5) || defined(FFX_INTERNAL) case 5: rw_img_mip_5[iPxPos] = value; break; -#endif default: + // avoid flattened side effect #if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE) || defined(FFX_INTERNAL) rw_img_mip_shading_change[iPxPos] = rw_img_mip_shading_change[iPxPos]; @@ -131,14 +98,14 @@ void SPD_SetMipmap(int2 iPxPos, int slice, float value) } } -void SPD_IncreaseAtomicCounter(inout uint spdCounter) +void SPD_IncreaseAtomicCounter(inout FfxUInt32 spdCounter) { - InterlockedAdd(rw_spd_global_atomic[min16int2(0,0)], 1, spdCounter); + InterlockedAdd(rw_spd_global_atomic[FfxInt32x2(0,0)], 1, spdCounter); } void SPD_ResetAtomicCounter() { - rw_spd_global_atomic[min16int2(0,0)] = 0; + rw_spd_global_atomic[FfxInt32x2(0,0)] = 0; } #include "ffx_fsr2_compute_luminance_pyramid.h" diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_depth_clip.h b/Assets/Resources/FSR2/shaders/ffx_fsr2_depth_clip.h index 81db737..be41b38 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_depth_clip.h +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_depth_clip.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -24,75 +24,234 @@ FFX_STATIC const FfxFloat32 DepthClipBaseScale = 4.0f; -FfxFloat32 ComputeSampleDepthClip(FfxInt32x2 iPxSamplePos, FfxFloat32 fPreviousDepth, FfxFloat32 fPreviousDepthBilinearWeight, FfxFloat32 fCurrentDepthViewSpace) +FfxFloat32 ComputeDepthClip(FfxFloat32x2 fUvSample, FfxFloat32 fCurrentDepthSample) { - FfxFloat32 fPrevNearestDepthViewSpace = abs(ConvertFromDeviceDepthToViewSpace(fPreviousDepth)); + FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(fCurrentDepthSample); + BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUvSample, RenderSize()); - // Depth separation logic ref: See "Minimum Triangle Separation for Correct Z-Buffer Occlusion" - // Intention: worst case of formula in Figure4 combined with Ksep factor in Section 4 - // TODO: check intention and improve, some banding visible - const FfxFloat32 fHalfViewportWidth = RenderSize().x * 0.5f; - FfxFloat32 fDepthThreshold = ffxMin(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace); + FfxFloat32 fDilatedSum = 0.0f; + FfxFloat32 fDepth = 0.0f; + FfxFloat32 fWeightSum = 0.0f; + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) { + + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset; + + if (IsOnScreen(iSamplePos, RenderSize())) { + const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex]; + if (fWeight > fReconstructedDepthBilinearWeightThreshold) { + + const FfxFloat32 fPrevDepthSample = LoadReconstructedPrevDepth(iSamplePos); + const FfxFloat32 fPrevNearestDepthViewSpace = GetViewSpaceDepth(fPrevDepthSample); - // WARNING: Ksep only works with reversed-z with infinite projection. - const FfxFloat32 Ksep = 1.37e-05f; - FfxFloat32 fRequiredDepthSeparation = Ksep * fDepthThreshold * TanHalfFoV() * fHalfViewportWidth; - FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace; + const FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace; - FfxFloat32 fDepthClipFactor = (fDepthDiff > 0) ? ffxSaturate(fRequiredDepthSeparation / fDepthDiff) : 1.0f; + if (fDepthDiff > 0.0f) { -#ifdef _DEBUG - rw_debug_out[iPxSamplePos] = FfxFloat32x4(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace, fDepthDiff, fDepthClipFactor); +#if FFX_FSR2_OPTION_INVERTED_DEPTH + const FfxFloat32 fPlaneDepth = ffxMin(fPrevDepthSample, fCurrentDepthSample); +#else + const FfxFloat32 fPlaneDepth = ffxMax(fPrevDepthSample, fCurrentDepthSample); #endif + + const FfxFloat32x3 fCenter = GetViewSpacePosition(FfxInt32x2(RenderSize() * 0.5f), RenderSize(), fPlaneDepth); + const FfxFloat32x3 fCorner = GetViewSpacePosition(FfxInt32x2(0, 0), RenderSize(), fPlaneDepth); - return fPreviousDepthBilinearWeight * fDepthClipFactor * ffxLerp(1.0f, DepthClipBaseScale, ffxSaturate(fDepthDiff * fDepthDiff)); + const FfxFloat32 fHalfViewportWidth = length(FfxFloat32x2(RenderSize())); + const FfxFloat32 fDepthThreshold = ffxMax(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace); + + const FfxFloat32 Ksep = 1.37e-05f; + const FfxFloat32 Kfov = length(fCorner) / length(fCenter); + const FfxFloat32 fRequiredDepthSeparation = Ksep * Kfov * fHalfViewportWidth * fDepthThreshold; + + const FfxFloat32 fResolutionFactor = ffxSaturate(length(FfxFloat32x2(RenderSize())) / length(FfxFloat32x2(1920.0f, 1080.0f))); + const FfxFloat32 fPower = ffxLerp(1.0f, 3.0f, fResolutionFactor); + fDepth += ffxPow(ffxSaturate(FfxFloat32(fRequiredDepthSeparation / fDepthDiff)), fPower) * fWeight; + fWeightSum += fWeight; + } + } + } + } + + return (fWeightSum > 0) ? ffxSaturate(1.0f - fDepth / fWeightSum) : 0.0f; } -FfxFloat32 ComputeDepthClip(FfxFloat32x2 fUvSample, FfxFloat32 fCurrentDepthViewSpace) +FfxFloat32 ComputeMotionDivergence(FfxInt32x2 iPxPos, FfxInt32x2 iPxInputMotionVectorSize) { - FfxFloat32x2 fPxSample = fUvSample * RenderSize() - 0.5f; - FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); - FfxFloat32x2 fPxFrac = ffxFract(fPxSample); + FfxFloat32 minconvergence = 1.0f; - const FfxFloat32 fBilinearWeights[2][2] = { - { - (1 - fPxFrac.x) * (1 - fPxFrac.y), - (fPxFrac.x) * (1 - fPxFrac.y) - }, - { - (1 - fPxFrac.x) * (fPxFrac.y), - (fPxFrac.x) * (fPxFrac.y) - } - }; + FfxFloat32x2 fMotionVectorNucleus = LoadInputMotionVector(iPxPos); + FfxFloat32 fNucleusVelocityLr = length(fMotionVectorNucleus * RenderSize()); + FfxFloat32 fMaxVelocityUv = length(fMotionVectorNucleus); - FfxFloat32 fDepth = 0.0f; - FfxFloat32 fWeightSum = 0.0f; - for (FfxInt32 y = 0; y <= 1; ++y) { - for (FfxInt32 x = 0; x <= 1; ++x) { - FfxInt32x2 iSamplePos = iPxSample + FfxInt32x2(x, y); - if (IsOnScreen(iSamplePos, RenderSize())) { - FfxFloat32 fBilinearWeight = fBilinearWeights[y][x]; - if (fBilinearWeight > reconstructedDepthBilinearWeightThreshold) { - fDepth += ComputeSampleDepthClip(iSamplePos, LoadReconstructedPrevDepth(iSamplePos), fBilinearWeight, fCurrentDepthViewSpace); - fWeightSum += fBilinearWeight; - } + const FfxFloat32 MotionVectorVelocityEpsilon = 1e-02f; + + if (fNucleusVelocityLr > MotionVectorVelocityEpsilon) { + for (FfxInt32 y = -1; y <= 1; ++y) { + for (FfxInt32 x = -1; x <= 1; ++x) { + + FfxInt32x2 sp = ClampLoad(iPxPos, FfxInt32x2(x, y), iPxInputMotionVectorSize); + + FfxFloat32x2 fMotionVector = LoadInputMotionVector(sp); + FfxFloat32 fVelocityUv = length(fMotionVector); + + fMaxVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv); + fVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv); + minconvergence = ffxMin(minconvergence, dot(fMotionVector / fVelocityUv, fMotionVectorNucleus / fVelocityUv)); } } } - return (fWeightSum > 0) ? fDepth / fWeightSum : DepthClipBaseScale; + return ffxSaturate(1.0f - minconvergence) * ffxSaturate(fMaxVelocityUv / 0.01f); +} + +FfxFloat32 ComputeDepthDivergence(FfxInt32x2 iPxPos) +{ + const FfxFloat32 fMaxDistInMeters = GetMaxDistanceInMeters(); + FfxFloat32 fDepthMax = 0.0f; + FfxFloat32 fDepthMin = fMaxDistInMeters; + + FfxInt32 iMaxDistFound = 0; + + for (FfxInt32 y = -1; y < 2; y++) { + for (FfxInt32 x = -1; x < 2; x++) { + + const FfxInt32x2 iOffset = FfxInt32x2(x, y); + const FfxInt32x2 iSamplePos = iPxPos + iOffset; + + const FfxFloat32 fOnScreenFactor = IsOnScreen(iSamplePos, RenderSize()) ? 1.0f : 0.0f; + FfxFloat32 fDepth = GetViewSpaceDepthInMeters(LoadDilatedDepth(iSamplePos)) * fOnScreenFactor; + + iMaxDistFound |= FfxInt32(fMaxDistInMeters == fDepth); + + fDepthMin = ffxMin(fDepthMin, fDepth); + fDepthMax = ffxMax(fDepthMax, fDepth); + } + } + + return (1.0f - fDepthMin / fDepthMax) * (FfxBoolean(iMaxDistFound) ? 0.0f : 1.0f); +} + +FfxFloat32 ComputeTemporalMotionDivergence(FfxInt32x2 iPxPos) +{ + const FfxFloat32x2 fUv = FfxFloat32x2(iPxPos + 0.5f) / RenderSize(); + + FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos); + FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; + fReprojectedUv = ClampUv(fReprojectedUv, RenderSize(), MaxRenderSize()); + FfxFloat32x2 fPrevMotionVector = SamplePreviousDilatedMotionVector(fReprojectedUv); + + float fPxDistance = length(fMotionVector * DisplaySize()); + return fPxDistance > 1.0f ? ffxLerp(0.0f, 1.0f - ffxSaturate(length(fPrevMotionVector) / length(fMotionVector)), ffxSaturate(ffxPow(fPxDistance / 20.0f, 3.0f))) : 0; +} + +void PreProcessReactiveMasks(FfxInt32x2 iPxLrPos, FfxFloat32 fMotionDivergence) +{ + // Compensate for bilinear sampling in accumulation pass + + FfxFloat32x3 fReferenceColor = LoadInputColor(iPxLrPos).xyz; + FfxFloat32x2 fReactiveFactor = FfxFloat32x2(0.0f, fMotionDivergence); + + float fMasksSum = 0.0f; + + FfxFloat32x3 fColorSamples[9]; + FfxFloat32 fReactiveSamples[9]; + FfxFloat32 fTransparencyAndCompositionSamples[9]; + + FFX_UNROLL + for (FfxInt32 y = -1; y < 2; y++) { + FFX_UNROLL + for (FfxInt32 x = -1; x < 2; x++) { + + const FfxInt32x2 sampleCoord = ClampLoad(iPxLrPos, FfxInt32x2(x, y), FfxInt32x2(RenderSize())); + + FfxInt32 sampleIdx = (y + 1) * 3 + x + 1; + + FfxFloat32x3 fColorSample = LoadInputColor(sampleCoord).xyz; + FfxFloat32 fReactiveSample = LoadReactiveMask(sampleCoord); + FfxFloat32 fTransparencyAndCompositionSample = LoadTransparencyAndCompositionMask(sampleCoord); + + fColorSamples[sampleIdx] = fColorSample; + fReactiveSamples[sampleIdx] = fReactiveSample; + fTransparencyAndCompositionSamples[sampleIdx] = fTransparencyAndCompositionSample; + + fMasksSum += (fReactiveSample + fTransparencyAndCompositionSample); + } + } + + if (fMasksSum > 0) + { + for (FfxInt32 sampleIdx = 0; sampleIdx < 9; sampleIdx++) + { + FfxFloat32x3 fColorSample = fColorSamples[sampleIdx]; + FfxFloat32 fReactiveSample = fReactiveSamples[sampleIdx]; + FfxFloat32 fTransparencyAndCompositionSample = fTransparencyAndCompositionSamples[sampleIdx]; + + const FfxFloat32 fMaxLenSq = ffxMax(dot(fReferenceColor, fReferenceColor), dot(fColorSample, fColorSample)); + const FfxFloat32 fSimilarity = dot(fReferenceColor, fColorSample) / fMaxLenSq; + + // Increase power for non-similar samples + const FfxFloat32 fPowerBiasMax = 6.0f; + const FfxFloat32 fSimilarityPower = 1.0f + (fPowerBiasMax - fSimilarity * fPowerBiasMax); + const FfxFloat32 fWeightedReactiveSample = ffxPow(fReactiveSample, fSimilarityPower); + const FfxFloat32 fWeightedTransparencyAndCompositionSample = ffxPow(fTransparencyAndCompositionSample, fSimilarityPower); + + fReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32x2(fWeightedReactiveSample, fWeightedTransparencyAndCompositionSample)); + } + } + + StoreDilatedReactiveMasks(iPxLrPos, fReactiveFactor); } +FfxFloat32x3 ComputePreparedInputColor(FfxInt32x2 iPxLrPos) +{ + //We assume linear data. if non-linear input (sRGB, ...), + //then we should convert to linear first and back to sRGB on output. + FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos)); + + fRgb = PrepareRgb(fRgb, Exposure(), PreExposure()); + + const FfxFloat32x3 fPreparedYCoCg = RGBToYCoCg(fRgb); + + return fPreparedYCoCg; +} + +float EvaluateSurface(FfxInt32x2 iPxPos, FfxFloat32x2 fMotionVector) +{ + FfxFloat32 d0 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, -1))); + FfxFloat32 d1 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 0))); + FfxFloat32 d2 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 1))); + + return 1.0f - FfxFloat32(((d0 - d1) > (d1 * 0.01f)) && ((d1 - d2) > (d2 * 0.01f))); +} void DepthClip(FfxInt32x2 iPxPos) { FfxFloat32x2 fDepthUv = (iPxPos + 0.5f) / RenderSize(); FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos); - FfxFloat32x2 fDilatedUv = fDepthUv + fMotionVector; - FfxFloat32 fCurrentDepthViewSpace = abs(ConvertFromDeviceDepthToViewSpace(LoadDilatedDepth(iPxPos))); - FfxFloat32 fDepthClip = ComputeDepthClip(fDilatedUv, fCurrentDepthViewSpace); + // Discard tiny mvs + fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.01f); + + const FfxFloat32x2 fDilatedUv = fDepthUv + fMotionVector; + const FfxFloat32 fDilatedDepth = LoadDilatedDepth(iPxPos); + const FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(LoadInputDepth(iPxPos)); + + // Compute prepared input color and depth clip + FfxFloat32 fDepthClip = ComputeDepthClip(fDilatedUv, fDilatedDepth) * EvaluateSurface(iPxPos, fMotionVector); + FfxFloat32x3 fPreparedYCoCg = ComputePreparedInputColor(iPxPos); + StorePreparedInputColor(iPxPos, FfxFloat32x4(fPreparedYCoCg, fDepthClip)); + + // Compute dilated reactive mask +#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS + FfxInt32x2 iSamplePos = iPxPos; +#else + FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxPos); +#endif + + FfxFloat32 fMotionDivergence = ComputeMotionDivergence(iSamplePos, RenderSize()); + FfxFloat32 fTemporalMotionDifference = ffxSaturate(ComputeTemporalMotionDivergence(iPxPos) - ComputeDepthDivergence(iPxPos)); - StoreDepthClip(iPxPos, fDepthClip); + PreProcessReactiveMasks(iPxPos, ffxMax(fTemporalMotionDifference, fMotionDivergence)); } #endif //!defined( FFX_FSR2_DEPTH_CLIPH ) \ No newline at end of file diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_depth_clip_pass.glsl b/Assets/Resources/FSR2/shaders/ffx_fsr2_depth_clip_pass.glsl index 7233ec6..c7e3093 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_depth_clip_pass.glsl +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_depth_clip_pass.glsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -19,13 +19,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -// FSR2 pass 3 -// SRV 7 : FSR2_ReconstructedPrevNearestDepth : r_reconstructed_previous_nearest_depth -// SRV 8 : FSR2_DilatedVelocity : r_dilated_motion_vectors -// SRV 9 : FSR2_DilatedDepth : r_dilatedDepth -// UAV 12 : FSR2_DepthClip : rw_depth_clip -// CB 0 : cbFSR2 - #version 450 #extension GL_GOOGLE_include_directive : require @@ -34,8 +27,20 @@ #define FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 #define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 1 #define FSR2_BIND_SRV_DILATED_DEPTH 2 -#define FSR2_BIND_UAV_DEPTH_CLIP 3 -#define FSR2_BIND_CB_FSR2 4 +#define FSR2_BIND_SRV_REACTIVE_MASK 3 +#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 4 +#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 5 +#define FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS 6 +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 7 +#define FSR2_BIND_SRV_INPUT_COLOR 8 +#define FSR2_BIND_SRV_INPUT_DEPTH 9 +#define FSR2_BIND_SRV_INPUT_EXPOSURE 10 + +#define FSR2_BIND_UAV_DEPTH_CLIP 11 +#define FSR2_BIND_UAV_DILATED_REACTIVE_MASKS 12 +#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR 13 + +#define FSR2_BIND_CB_FSR2 14 #include "ffx_fsr2_callbacks_glsl.h" #include "ffx_fsr2_common.h" diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_depth_clip_pass.hlsl b/Assets/Resources/FSR2/shaders/ffx_fsr2_depth_clip_pass.hlsl index 8433734..3cf501c 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_depth_clip_pass.hlsl +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_depth_clip_pass.hlsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -19,17 +19,20 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -// FSR2 pass 3 -// SRV 7 : FSR2_ReconstructedPrevNearestDepth : r_reconstructed_previous_nearest_depth -// SRV 8 : FSR2_DilatedVelocity : r_dilated_motion_vectors -// SRV 9 : FSR2_DilatedDepth : r_dilatedDepth -// UAV 12 : FSR2_DepthClip : rw_depth_clip -// CB 0 : cbFSR2 - #define FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 #define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 1 #define FSR2_BIND_SRV_DILATED_DEPTH 2 -#define FSR2_BIND_UAV_DEPTH_CLIP 0 +#define FSR2_BIND_SRV_REACTIVE_MASK 3 +#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 4 +#define FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS 5 +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 6 +#define FSR2_BIND_SRV_INPUT_COLOR 7 +#define FSR2_BIND_SRV_INPUT_DEPTH 8 +#define FSR2_BIND_SRV_INPUT_EXPOSURE 9 + +#define FSR2_BIND_UAV_DILATED_REACTIVE_MASKS 0 +#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR 1 + #define FSR2_BIND_CB_FSR2 0 #include "ffx_fsr2_callbacks_hlsl.h" diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_lock.h b/Assets/Resources/FSR2/shaders/ffx_fsr2_lock.h index b2266b7..8347fa8 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_lock.h +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_lock.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -22,17 +22,24 @@ #ifndef FFX_FSR2_LOCK_H #define FFX_FSR2_LOCK_H -FfxFloat32 GetLuma(FfxInt32x2 pos) +void ClearResourcesForNextFrame(in FfxInt32x2 iPxHrPos) { - //add some bias to avoid locking dark areas - return FfxFloat32(LoadPreparedInputColorLuma(pos)); + if (all(FFX_LESS_THAN(iPxHrPos, FfxInt32x2(RenderSize())))) + { +#if FFX_FSR2_OPTION_INVERTED_DEPTH + const FfxUInt32 farZ = 0x0; +#else + const FfxUInt32 farZ = 0x3f800000; +#endif + SetReconstructedDepth(iPxHrPos, farZ); + } } -FfxFloat32 ComputeThinFeatureConfidence(FfxInt32x2 pos) +FfxBoolean ComputeThinFeatureConfidence(FfxInt32x2 pos) { const FfxInt32 RADIUS = 1; - FfxFloat32 fNucleus = GetLuma(pos); + FfxFloat32 fNucleus = LoadLockInputLuma(pos); FfxFloat32 similar_threshold = 1.05f; FfxFloat32 dissimilarLumaMin = FSR2_FLT_MAX; @@ -48,7 +55,8 @@ FfxFloat32 ComputeThinFeatureConfidence(FfxInt32x2 pos) FfxUInt32 mask = SETBIT(4); //flag fNucleus as similar - const FfxUInt32 rejectionMasks[4] = { + const FfxUInt32 uNumRejectionMasks = 4; + const FfxUInt32 uRejectionMasks[uNumRejectionMasks] = { SETBIT(0) | SETBIT(1) | SETBIT(3) | SETBIT(4), //Upper left SETBIT(1) | SETBIT(2) | SETBIT(4) | SETBIT(5), //Upper right SETBIT(3) | SETBIT(4) | SETBIT(6) | SETBIT(7), //Lower left @@ -64,7 +72,7 @@ FfxFloat32 ComputeThinFeatureConfidence(FfxInt32x2 pos) FfxInt32x2 samplePos = ClampLoad(pos, FfxInt32x2(x, y), FfxInt32x2(RenderSize())); - FfxFloat32 sampleLuma = GetLuma(samplePos); + FfxFloat32 sampleLuma = LoadLockInputLuma(samplePos); FfxFloat32 difference = ffxMax(sampleLuma, fNucleus) / ffxMin(sampleLuma, fNucleus); if (difference > 0 && (difference < similar_threshold)) { @@ -80,47 +88,28 @@ FfxFloat32 ComputeThinFeatureConfidence(FfxInt32x2 pos) if (FFX_FALSE == isRidge) { - return 0; + return false; } FFX_UNROLL for (FfxInt32 i = 0; i < 4; i++) { - if ((mask & rejectionMasks[i]) == rejectionMasks[i]) { - return 0; + if ((mask & uRejectionMasks[i]) == uRejectionMasks[i]) { + return false; } } - return 1; + return true; } -FFX_STATIC FfxBoolean s_bLockUpdated = FFX_FALSE; - -FfxFloat32x3 ComputeLockStatus(FfxInt32x2 iPxLrPos, FfxFloat32x3 fLockStatus) +void ComputeLock(FfxInt32x2 iPxLrPos) { - FfxFloat32 fConfidenceOfThinFeature = ComputeThinFeatureConfidence(iPxLrPos); - - s_bLockUpdated = FFX_FALSE; - if (fConfidenceOfThinFeature > 0.0f) + if (ComputeThinFeatureConfidence(iPxLrPos)) { - //put to negative on new lock - fLockStatus[LOCK_LIFETIME_REMAINING] = (fLockStatus[LOCK_LIFETIME_REMAINING] == FfxFloat32(0.0f)) ? FfxFloat32(-LockInitialLifetime()) : FfxFloat32(-(LockInitialLifetime() * 2)); - - s_bLockUpdated = FFX_TRUE; + StoreNewLocks(ComputeHrPosFromLrPos(iPxLrPos), 1.f); } - return fLockStatus; -} - -void ComputeLock(FfxInt32x2 iPxLrPos) -{ - FfxInt32x2 iPxHrPos = ComputeHrPosFromLrPos(iPxLrPos); - - FfxFloat32x3 fLockStatus = ComputeLockStatus(iPxLrPos, LoadLockStatus(iPxHrPos)); - - if ((s_bLockUpdated)) { - StoreLockStatus(iPxHrPos, fLockStatus); - } + ClearResourcesForNextFrame(iPxLrPos); } #endif // FFX_FSR2_LOCK_H diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_lock_pass.glsl b/Assets/Resources/FSR2/shaders/ffx_fsr2_lock_pass.glsl index 9c37774..f7cad59 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_lock_pass.glsl +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_lock_pass.glsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -19,23 +19,14 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -// FSR2 pass 4 -// SRV 5 : m_UpscaleReactive : r_reactive_mask -// SRV 11 : FSR2_LockStatus2 : r_lock_status -// SRV 13 : FSR2_PreparedInputColor : r_prepared_input_color -// UAV 11 : FSR2_LockStatus1 : rw_lock_status -// UAV 27 : FSR2_ReactiveMaskMax : rw_reactive_max -// CB 0 : cbFSR2 -// CB 1 : FSR2DispatchOffsets - #version 450 #extension GL_GOOGLE_include_directive : require #extension GL_EXT_samplerless_texture_functions : require -#define FSR2_BIND_SRV_LOCK_STATUS 0 -#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 1 -#define FSR2_BIND_UAV_LOCK_STATUS 2 +#define FSR2_BIND_SRV_LOCK_INPUT_LUMA 0 +#define FSR2_BIND_UAV_NEW_LOCKS 1 +#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 2 #define FSR2_BIND_CB_FSR2 3 #include "ffx_fsr2_callbacks_glsl.h" diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_lock_pass.hlsl b/Assets/Resources/FSR2/shaders/ffx_fsr2_lock_pass.hlsl index 492965c..1409dce 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_lock_pass.hlsl +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_lock_pass.hlsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -19,16 +19,9 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -// FSR2 pass 4 -// SRV 5 : m_UpscaleReactive : r_reactive_mask -// SRV 11 : FSR2_LockStatus2 : r_lock_status -// SRV 13 : FSR2_PreparedInputColor : r_prepared_input_color -// UAV 11 : FSR2_LockStatus1 : rw_lock_status -// CB 0 : cbFSR2 - -#define FSR2_BIND_SRV_LOCK_STATUS 1 -#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 2 -#define FSR2_BIND_UAV_LOCK_STATUS 0 +#define FSR2_BIND_SRV_LOCK_INPUT_LUMA 0 +#define FSR2_BIND_UAV_NEW_LOCKS 0 +#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 1 #define FSR2_BIND_CB_FSR2 0 #include "ffx_fsr2_callbacks_hlsl.h" diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_postprocess_lock_status.h b/Assets/Resources/FSR2/shaders/ffx_fsr2_postprocess_lock_status.h index 959031b..cee9e14 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_postprocess_lock_status.h +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_postprocess_lock_status.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -37,62 +37,70 @@ FFX_MIN16_F4 WrapShadingChangeLuma(FFX_MIN16_I2 iPxSample) #if FFX_FSR2_OPTION_POSTPROCESSLOCKSTATUS_SAMPLERS_USE_DATA_HALF && FFX_HALF DeclareCustomFetchBilinearSamplesMin16(FetchShadingChangeLumaSamples, WrapShadingChangeLuma) #else -DeclareCustomFetchBilinearSamples(FetchShadingChangeLumaSamples, WrapShadingChangeLuma) +DeclareCustomFetchBicubicSamples(FetchShadingChangeLumaSamples, WrapShadingChangeLuma) #endif -DeclareCustomTextureSample(ShadingChangeLumaSample, Bilinear, FetchShadingChangeLumaSamples) +DeclareCustomTextureSample(ShadingChangeLumaSample, Lanczos2, FetchShadingChangeLumaSamples) -FfxFloat32 GetShadingChangeLuma(FfxFloat32x2 fUvCoord) +FfxFloat32 GetShadingChangeLuma(FfxInt32x2 iPxHrPos, FfxFloat32x2 fUvCoord) { - // const FfxFloat32 fShadingChangeLuma = exp(ShadingChangeLumaSample(fUvCoord, LumaMipDimensions()) * LumaMipRcp()); - const FfxFloat32 fShadingChangeLuma = FfxFloat32(exp(SampleMipLuma(fUvCoord, LumaMipLevelToUse()) * FfxFloat32(LumaMipRcp()))); - return fShadingChangeLuma; -} - -LockState GetLockState(FfxFloat32x3 fLockStatus) -{ - LockState state = { FFX_FALSE, FFX_FALSE }; + FfxFloat32 fShadingChangeLuma = 0; - //Check if this is a new or refreshed lock - state.NewLock = fLockStatus[LOCK_LIFETIME_REMAINING] < FfxFloat32(0.0f); +#if 0 + fShadingChangeLuma = Exposure() * exp(ShadingChangeLumaSample(fUvCoord, LumaMipDimensions()).x); +#else - //For a non-refreshed lock, the lifetime is set to LockInitialLifetime() - state.WasLockedPrevFrame = fLockStatus[LOCK_TRUST] != FfxFloat32(0.0f); + const FfxFloat32 fDiv = FfxFloat32(2 << LumaMipLevelToUse()); + FfxInt32x2 iMipRenderSize = FfxInt32x2(RenderSize() / fDiv); - return state; -} + fUvCoord = ClampUv(fUvCoord, iMipRenderSize, LumaMipDimensions()); + fShadingChangeLuma = Exposure() * exp(FfxFloat32(SampleMipLuma(fUvCoord, LumaMipLevelToUse()))); +#endif -LockState PostProcessLockStatus(FfxInt32x2 iPxHrPos, FFX_PARAMETER_IN FfxFloat32x2 fLrUvJittered, FFX_PARAMETER_IN FfxFloat32 fDepthClipFactor, const FfxFloat32 fAccumulationMask, FFX_PARAMETER_IN FfxFloat32 fHrVelocity, - FFX_PARAMETER_INOUT FfxFloat32 fAccumulationTotalWeight, FFX_PARAMETER_INOUT FfxFloat32x3 fLockStatus, FFX_PARAMETER_OUT FfxFloat32 fLuminanceDiff) { + fShadingChangeLuma = ffxPow(fShadingChangeLuma, 1.0f / 6.0f); - const LockState state = GetLockState(fLockStatus); + return fShadingChangeLuma; +} - fLockStatus[LOCK_LIFETIME_REMAINING] = abs(fLockStatus[LOCK_LIFETIME_REMAINING]); +void UpdateLockStatus(AccumulationPassCommonParams params, + FFX_PARAMETER_INOUT FfxFloat32 fReactiveFactor, LockState state, + FFX_PARAMETER_INOUT FfxFloat32x2 fLockStatus, + FFX_PARAMETER_OUT FfxFloat32 fLockContributionThisFrame, + FFX_PARAMETER_OUT FfxFloat32 fLuminanceDiff) { - FfxFloat32 fShadingChangeLuma = GetShadingChangeLuma(fLrUvJittered); + const FfxFloat32 fShadingChangeLuma = GetShadingChangeLuma(params.iPxHrPos, params.fHrUv); //init temporal shading change factor, init to -1 or so in reproject to know if "true new"? fLockStatus[LOCK_TEMPORAL_LUMA] = (fLockStatus[LOCK_TEMPORAL_LUMA] == FfxFloat32(0.0f)) ? fShadingChangeLuma : fLockStatus[LOCK_TEMPORAL_LUMA]; FfxFloat32 fPreviousShadingChangeLuma = fLockStatus[LOCK_TEMPORAL_LUMA]; - fLockStatus[LOCK_TEMPORAL_LUMA] = ffxLerp(fLockStatus[LOCK_TEMPORAL_LUMA], FfxFloat32(fShadingChangeLuma), FfxFloat32(0.5f)); - fLuminanceDiff = FfxFloat32(1) - MinDividedByMax(fPreviousShadingChangeLuma, fShadingChangeLuma); - if (fLuminanceDiff > FfxFloat32(0.2f)) { - KillLock(fLockStatus); - } + fLuminanceDiff = 1.0f - MinDividedByMax(fPreviousShadingChangeLuma, fShadingChangeLuma); - if (!state.NewLock && fLockStatus[LOCK_LIFETIME_REMAINING] >= FfxFloat32(0)) - { - fLockStatus[LOCK_LIFETIME_REMAINING] *= (1.0f - fAccumulationMask); + if (state.NewLock) { + fLockStatus[LOCK_TEMPORAL_LUMA] = fShadingChangeLuma; - const FfxFloat32 depthClipThreshold = FfxFloat32(0.99f); - if (fDepthClipFactor < depthClipThreshold) - { + fLockStatus[LOCK_LIFETIME_REMAINING] = (fLockStatus[LOCK_LIFETIME_REMAINING] != 0.0f) ? 2.0f : 1.0f; + } + else if(fLockStatus[LOCK_LIFETIME_REMAINING] <= 1.0f) { + fLockStatus[LOCK_TEMPORAL_LUMA] = ffxLerp(fLockStatus[LOCK_TEMPORAL_LUMA], FfxFloat32(fShadingChangeLuma), 0.5f); + } + else { + if (fLuminanceDiff > 0.1f) { KillLock(fLockStatus); } } - return state; + fReactiveFactor = ffxMax(fReactiveFactor, ffxSaturate((fLuminanceDiff - 0.1f) * 10.0f)); + fLockStatus[LOCK_LIFETIME_REMAINING] *= (1.0f - fReactiveFactor); + + fLockStatus[LOCK_LIFETIME_REMAINING] *= ffxSaturate(1.0f - params.fAccumulationMask); + fLockStatus[LOCK_LIFETIME_REMAINING] *= FfxFloat32(params.fDepthClipFactor < 0.1f); + + // Compute this frame lock contribution + const FfxFloat32 fLifetimeContribution = ffxSaturate(fLockStatus[LOCK_LIFETIME_REMAINING] - 1.0f); + const FfxFloat32 fShadingChangeContribution = ffxSaturate(MinDividedByMax(fLockStatus[LOCK_TEMPORAL_LUMA], fShadingChangeLuma)); + + fLockContributionThisFrame = ffxSaturate(ffxSaturate(fLifetimeContribution * 4.0f) * fShadingChangeContribution); } #endif //!defined( FFX_FSR2_POSTPROCESS_LOCK_STATUS_H ) diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_rcas.h b/Assets/Resources/FSR2/shaders/ffx_fsr2_rcas.h index 0429d8f..d9006cd 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_rcas.h +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_rcas.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -28,67 +28,29 @@ void WriteUpscaledOutput(FFX_MIN16_U2 iPxHrPos, FfxFloat32x3 fUpscaledColor) StoreUpscaledOutput(FFX_MIN16_I2(iPxHrPos), fUpscaledColor); } -#if FFX_HALF - #define FSR_RCAS_H - FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p) - { - FfxFloat32x4 inputSample = LoadRCAS_Input(p); //TODO: fix type - - inputSample.rgb *= Exposure(); - -#if FFX_FSR2_OPTION_HDR_COLOR_INPUT - inputSample.rgb = Tonemap(inputSample.rgb); -#endif // #if FFX_FSR2_OPTION_HDR_COLOR_INPUT - - return FfxFloat16x4(inputSample); - } - void FsrRcasInputH(inout FfxFloat16 r, inout FfxFloat16 g, inout FfxFloat16 b) {} -#else - #define FSR_RCAS_F - FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p) - { - FfxFloat32x4 inputSample = LoadRCAS_Input(p); - - inputSample.rgb *= Exposure(); +#define FSR_RCAS_F +FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p) +{ + FfxFloat32x4 fColor = LoadRCAS_Input(p); -#if FFX_FSR2_OPTION_HDR_COLOR_INPUT - inputSample.rgb = Tonemap(inputSample.rgb); -#endif + fColor.rgb = PrepareRgb(fColor.rgb, Exposure(), PreExposure()); - return inputSample; - } + return fColor; +} - void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {} -#endif // #if FFX_HALF +void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {} #include "ffx_fsr1.h" void CurrFilter(FFX_MIN16_U2 pos) { -#if FFX_HALF - FfxFloat16x3 c; - FsrRcasH(c.r, c.g, c.b, pos, RCASConfig()); - -#if FFX_FSR2_OPTION_HDR_COLOR_INPUT - c = InverseTonemap(c); -#endif - - c /= FfxFloat16(Exposure()); - - WriteUpscaledOutput(pos, c); //TODO: fix type -#else FfxFloat32x3 c; FsrRcasF(c.r, c.g, c.b, pos, RCASConfig()); -#if FFX_FSR2_OPTION_HDR_COLOR_INPUT - c = InverseTonemap(c); -#endif - - c /= Exposure(); + c = UnprepareRgb(c, Exposure()); WriteUpscaledOutput(pos, c); -#endif } void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid) diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_rcas_pass.glsl b/Assets/Resources/FSR2/shaders/ffx_fsr2_rcas_pass.glsl index 1097faf..20807a3 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_rcas_pass.glsl +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_rcas_pass.glsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -19,19 +19,14 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -// FSR2 pass 6 -// SRV 4 : m_Exposure : r_exposure -// SRV 19 : FSR2_InternalUpscaled1 : r_rcas_input -// UAV 18 : DisplayOutput : rw_upscaled_output -// CB 0 : cbFSR2 -// CB 1 : cbRCAS - #version 450 #extension GL_GOOGLE_include_directive : require #extension GL_EXT_samplerless_texture_functions : require +// Needed for rw_upscaled_output declaration +#extension GL_EXT_shader_image_load_formatted : require -#define FSR2_BIND_SRV_EXPOSURE 0 +#define FSR2_BIND_SRV_INPUT_EXPOSURE 0 #define FSR2_BIND_SRV_RCAS_INPUT 1 #define FSR2_BIND_UAV_UPSCALED_OUTPUT 2 #define FSR2_BIND_CB_FSR2 3 @@ -58,17 +53,10 @@ } #endif -#if FFX_HALF -vec4 LoadRCAS_Input(FfxInt16x2 iPxPos) -{ - return texelFetch(r_rcas_input, iPxPos, 0); -} -#else vec4 LoadRCAS_Input(FfxInt32x2 iPxPos) { return texelFetch(r_rcas_input, iPxPos, 0); } -#endif #include "ffx_fsr2_rcas.h" diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_rcas_pass.hlsl b/Assets/Resources/FSR2/shaders/ffx_fsr2_rcas_pass.hlsl index ea6b35a..f447b7e 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_rcas_pass.hlsl +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_rcas_pass.hlsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -19,14 +19,7 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -// FSR2 pass 6 -// SRV 4 : m_Exposure : r_exposure -// SRV 19 : FSR2_InternalUpscaled1 : r_rcas_input -// UAV 18 : DisplayOutput : rw_upscaled_output -// CB 0 : cbFSR2 -// CB 1 : cbRCAS - -#define FSR2_BIND_SRV_EXPOSURE 0 +#define FSR2_BIND_SRV_INPUT_EXPOSURE 0 #define FSR2_BIND_SRV_RCAS_INPUT 1 #define FSR2_BIND_UAV_UPSCALED_OUTPUT 0 #define FSR2_BIND_CB_FSR2 0 @@ -53,17 +46,11 @@ } #endif -#if FFX_HALF -float4 LoadRCAS_Input(FfxInt16x2 iPxPos) -{ - return r_rcas_input[iPxPos]; -} -#else + float4 LoadRCAS_Input(FfxInt32x2 iPxPos) { return r_rcas_input[iPxPos]; } -#endif #include "ffx_fsr2_rcas.h" diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h b/Assets/Resources/FSR2/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h index aad1992..e9ccc4b 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -24,36 +24,25 @@ void ReconstructPrevDepth(FfxInt32x2 iPxPos, FfxFloat32 fDepth, FfxFloat32x2 fMotionVector, FfxInt32x2 iPxDepthSize) { - FfxFloat32x2 fDepthUv = (iPxPos + FfxFloat32(0.5)) / iPxDepthSize; - FfxFloat32x2 fPxPrevPos = (fDepthUv + fMotionVector) * iPxDepthSize - FfxFloat32x2(0.5, 0.5); - FfxInt32x2 iPxPrevPos = FfxInt32x2(floor(fPxPrevPos)); - FfxFloat32x2 fPxFrac = ffxFract(fPxPrevPos); - - const FfxFloat32 bilinearWeights[2][2] = { - { - (1 - fPxFrac.x) * (1 - fPxFrac.y), - (fPxFrac.x) * (1 - fPxFrac.y) - }, - { - (1 - fPxFrac.x) * (fPxFrac.y), - (fPxFrac.x) * (fPxFrac.y) - } - }; + fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.1f); + + FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5)) / iPxDepthSize; + FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; + + BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, RenderSize()); // Project current depth into previous frame locations. // Push to all pixels having some contribution if reprojection is using bilinear logic. - for (FfxInt32 y = 0; y <= 1; ++y) { - for (FfxInt32 x = 0; x <= 1; ++x) { - - FfxInt32x2 offset = FfxInt32x2(x, y); - FfxFloat32 w = bilinearWeights[y][x]; - - if (w > reconstructedDepthBilinearWeightThreshold) { - - FfxInt32x2 storePos = iPxPrevPos + offset; - if (IsOnScreen(storePos, iPxDepthSize)) { - StoreReconstructedDepth(storePos, fDepth); - } + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) { + + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex]; + + if (fWeight > fReconstructedDepthBilinearWeightThreshold) { + + FfxInt32x2 iStorePos = bilinearInfo.iBasePos + iOffset; + if (IsOnScreen(iStorePos, iPxDepthSize)) { + StoreReconstructedDepth(iStorePos, fDepth); } } } @@ -106,65 +95,24 @@ void FindNearestDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxIn } } -FfxFloat32 ComputeMotionDivergence(FfxInt32x2 iPxPos, FfxInt32x2 iPxInputMotionVectorSize) -{ - FfxFloat32 minconvergence = 1.0f; - - FfxFloat32x2 fMotionVectorNucleus = LoadInputMotionVector(iPxPos) * RenderSize(); - FfxFloat32 fNucleusVelocity = length(fMotionVectorNucleus); - - const FfxFloat32 MotionVectorVelocityEpsilon = 1e-02f; - - if (fNucleusVelocity > MotionVectorVelocityEpsilon) { - for (FfxInt32 y = -1; y <= 1; ++y) { - for (FfxInt32 x = -1; x <= 1; ++x) { - - FfxInt32x2 sp = ClampLoad(iPxPos, FfxInt32x2(x, y), iPxInputMotionVectorSize); - - FfxFloat32x2 fMotionVector = LoadInputMotionVector(sp) * RenderSize(); - FfxFloat32 fVelocity = length(fMotionVector); - - fVelocity = ffxMax(fVelocity, fNucleusVelocity); - minconvergence = ffxMin(minconvergence, dot(fMotionVector / fVelocity, fMotionVectorNucleus / fVelocity)); - } - } - } - - return ffxSaturate(1.0f - minconvergence); -} - - -void PreProcessReactiveMasks(FfxInt32x2 iPxLrPos, FfxFloat32 fMotionDivergence) +FfxFloat32 ComputeLockInputLuma(FfxInt32x2 iPxLrPos) { - // Compensate for bilinear sampling in accumulation pass - - FfxFloat32x3 fReferenceColor = LoadPreparedInputColor(iPxLrPos); - FfxFloat32x2 fReactiveFactor = FfxFloat32x2(0.0f, fMotionDivergence); - - for (int y = -1; y < 2; y++) { - for (int x = -1; x < 2; x++) { - - const FfxInt32x2 sampleCoord = ClampLoad(iPxLrPos, FfxInt32x2(x, y), FfxInt32x2(RenderSize())); - - FfxFloat32x3 fColorSample = LoadPreparedInputColor(sampleCoord); - FfxFloat32 fReactiveSample = LoadReactiveMask(sampleCoord); - FfxFloat32 fTransparencyAndCompositionSample = LoadTransparencyAndCompositionMask(sampleCoord); + //We assume linear data. if non-linear input (sRGB, ...), + //then we should convert to linear first and back to sRGB on output. + FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos)); - const FfxFloat32 fColorSimilarity = dot(normalize(fReferenceColor), normalize(fColorSample)); - const FfxFloat32 fVelocitySimilarity = 1.0f - abs(length(fReferenceColor) - length(fColorSample)); - const FfxFloat32 fSimilarity = fColorSimilarity * fVelocitySimilarity; + // Use internal auto exposure for locking logic + fRgb /= PreExposure(); + fRgb *= Exposure(); - // Increase power for non-similar samples - const FfxFloat32 fPowerBiasMax = 6.0f; - const FfxFloat32 fSimilarityPower = 1.0f + (fPowerBiasMax - fSimilarity * fPowerBiasMax); - const FfxFloat32 fWeightedReactiveSample = ffxPow(fReactiveSample, fSimilarityPower); - const FfxFloat32 fWeightedTransparencyAndCompositionSample = ffxPow(fTransparencyAndCompositionSample, fSimilarityPower); +#if FFX_FSR2_OPTION_HDR_COLOR_INPUT + fRgb = Tonemap(fRgb); +#endif - fReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32x2(fWeightedReactiveSample, fWeightedTransparencyAndCompositionSample)); - } - } + //compute luma used to lock pixels, if used elsewhere the ffxPow must be moved! + const FfxFloat32 fLockInputLuma = ffxPow(RGBToPerceivedLuma(fRgb), FfxFloat32(1.0 / 6.0)); - StoreDilatedReactiveMasks(iPxLrPos, fReactiveFactor); + return fLockInputLuma; } void ReconstructAndDilate(FfxInt32x2 iPxLrPos) @@ -189,13 +137,8 @@ void ReconstructAndDilate(FfxInt32x2 iPxLrPos) ReconstructPrevDepth(iPxLrPos, fDilatedDepth, fDilatedMotionVector, RenderSize()); -#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS - FfxFloat32 fMotionDivergence = ComputeMotionDivergence(iSamplePos, RenderSize()); -#else - FfxFloat32 fMotionDivergence = ComputeMotionDivergence(iSamplePos, DisplaySize()); -#endif - - PreProcessReactiveMasks(iPxLrPos, fMotionDivergence); + FfxFloat32 fLockInputLuma = ComputeLockInputLuma(iPxLrPos); + StoreLockInputLuma(iPxLrPos, fLockInputLuma); } diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl b/Assets/Resources/FSR2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl index 96d1383..20e17ee 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -19,29 +19,26 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -// FSR2 pass 2 -// SRV 2 : m_MotionVector : r_motion_vectors -// SRV 3 : m_depthbuffer : r_depth -// UAV 7 : FSR2_ReconstructedPrevNearestDepth : rw_reconstructed_previous_nearest_depth -// UAV 8 : FSR2_DilatedVelocity : rw_dilated_motion_vectors -// UAV 9 : FSR2_DilatedDepth : rw_dilatedDepth -// CB 0 : cbFSR2 - #version 450 #extension GL_GOOGLE_include_directive : require #extension GL_EXT_samplerless_texture_functions : require -#define FSR2_BIND_SRV_MOTION_VECTORS 0 -#define FSR2_BIND_SRV_DEPTH 1 -#define FSR2_BIND_SRV_REACTIVE_MASK 2 -#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 3 -#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 4 +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 0 +#define FSR2_BIND_SRV_INPUT_DEPTH 1 +#define FSR2_BIND_SRV_INPUT_COLOR 2 +#define FSR2_BIND_SRV_INPUT_EXPOSURE 3 +#define FSR2_BIND_SRV_LUMA_HISTORY 4 + #define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 5 #define FSR2_BIND_UAV_DILATED_MOTION_VECTORS 6 #define FSR2_BIND_UAV_DILATED_DEPTH 7 -#define FSR2_BIND_UAV_DILATED_REACTIVE_MASKS 8 -#define FSR2_BIND_CB_FSR2 9 +#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR 8 +#define FSR2_BIND_UAV_LUMA_HISTORY 9 +#define FSR2_BIND_UAV_LUMA_INSTABILITY 10 +#define FSR2_BIND_UAV_LOCK_INPUT_LUMA 11 + +#define FSR2_BIND_CB_FSR2 12 #include "ffx_fsr2_callbacks_glsl.h" #include "ffx_fsr2_common.h" diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.hlsl b/Assets/Resources/FSR2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.hlsl index 57f3f49..33c044e 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.hlsl +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.hlsl @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -19,23 +19,16 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -// FSR2 pass 2 -// SRV 2 : m_MotionVector : r_motion_vectors -// SRV 3 : m_depthbuffer : r_depth -// UAV 7 : FSR2_ReconstructedPrevNearestDepth : rw_reconstructed_previous_nearest_depth -// UAV 8 : FSR2_DilatedVelocity : rw_dilated_motion_vectors -// UAV 9 : FSR2_DilatedDepth : rw_dilatedDepth -// CB 0 : cbFSR2 +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 0 +#define FSR2_BIND_SRV_INPUT_DEPTH 1 +#define FSR2_BIND_SRV_INPUT_COLOR 2 +#define FSR2_BIND_SRV_INPUT_EXPOSURE 3 -#define FSR2_BIND_SRV_MOTION_VECTORS 0 -#define FSR2_BIND_SRV_DEPTH 1 -#define FSR2_BIND_SRV_REACTIVE_MASK 2 -#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 3 -#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 4 #define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 #define FSR2_BIND_UAV_DILATED_MOTION_VECTORS 1 #define FSR2_BIND_UAV_DILATED_DEPTH 2 -#define FSR2_BIND_UAV_DILATED_REACTIVE_MASKS 3 +#define FSR2_BIND_UAV_LOCK_INPUT_LUMA 3 + #define FSR2_BIND_CB_FSR2 0 #include "ffx_fsr2_callbacks_hlsl.h" diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_reproject.h b/Assets/Resources/FSR2/shaders/ffx_fsr2_reproject.h index 5ae962d..f7f3961 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_reproject.h +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_reproject.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -23,7 +23,7 @@ #define FFX_FSR2_REPROJECT_H #ifndef FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE -#define FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE 1 // Approximate +#define FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE 0 // Reference #endif FfxFloat32x4 WrapHistory(FfxInt32x2 iPxSample) @@ -49,13 +49,16 @@ DeclareCustomTextureSample(HistorySample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR FfxFloat32x4 WrapLockStatus(FfxInt32x2 iPxSample) { - return FfxFloat32x4(LoadLockStatus(iPxSample), 0.0f); + FfxFloat32x4 fSample = FfxFloat32x4(LoadLockStatus(iPxSample), 0.0f, 0.0f); + return fSample; } #if FFX_HALF FFX_MIN16_F4 WrapLockStatus(FFX_MIN16_I2 iPxSample) { - return FFX_MIN16_F4(LoadLockStatus(iPxSample), 0.0f); + FFX_MIN16_F4 fSample = FFX_MIN16_F4(LoadLockStatus(iPxSample), 0.0, 0.0); + + return fSample; } #endif @@ -88,38 +91,46 @@ FfxFloat32x2 GetMotionVector(FfxInt32x2 iPxHrPos, FfxFloat32x2 fHrUv) return fDilatedMotionVector; } -void ComputeReprojectedUVs(FfxInt32x2 iPxHrPos, FfxFloat32x2 fMotionVector, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxBoolean bIsExistingSample) +FfxBoolean IsUvInside(FfxFloat32x2 fUv) +{ + return (fUv.x >= 0.0f && fUv.x <= 1.0f) && (fUv.y >= 0.0f && fUv.y <= 1.0f); +} + +void ComputeReprojectedUVs(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxBoolean bIsExistingSample) { - FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize(); - fReprojectedHrUv = fHrUv + fMotionVector; + fReprojectedHrUv = params.fHrUv + params.fMotionVector; - bIsExistingSample = (fReprojectedHrUv.x >= 0.0f && fReprojectedHrUv.x <= 1.0f) && - (fReprojectedHrUv.y >= 0.0f && fReprojectedHrUv.y <= 1.0f); + bIsExistingSample = IsUvInside(fReprojectedHrUv); } -void ReprojectHistoryColor(FfxInt32x2 iPxHrPos, FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxFloat32x4 fHistoryColorAndWeight) +void ReprojectHistoryColor(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x3 fHistoryColor, FFX_PARAMETER_OUT FfxFloat32 fTemporalReactiveFactor, FFX_PARAMETER_OUT FfxBoolean bInMotionLastFrame) { - fHistoryColorAndWeight = HistorySample(fReprojectedHrUv, DisplaySize()); - fHistoryColorAndWeight.rgb *= Exposure(); + FfxFloat32x4 fHistory = HistorySample(params.fReprojectedHrUv, DisplaySize()); -#if FFX_FSR2_OPTION_HDR_COLOR_INPUT - fHistoryColorAndWeight.rgb = Tonemap(fHistoryColorAndWeight.rgb); -#endif + fHistoryColor = PrepareRgb(fHistory.rgb, Exposure(), PreviousFramePreExposure()); + + fHistoryColor = RGBToYCoCg(fHistoryColor); - fHistoryColorAndWeight.rgb = RGBToYCoCg(fHistoryColorAndWeight.rgb); + //Compute temporal reactivity info + fTemporalReactiveFactor = ffxSaturate(abs(fHistory.w)); + bInMotionLastFrame = (fHistory.w < 0.0f); } -void ReprojectHistoryLockStatus(FfxInt32x2 iPxHrPos, FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxFloat32x3 fReprojectedLockStatus) +LockState ReprojectHistoryLockStatus(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedLockStatus) { - // If function is called from Accumulate pass, we need to treat locks differently - FfxFloat32 fInPlaceLockLifetime = LoadRwLockStatus(iPxHrPos)[LOCK_LIFETIME_REMAINING]; + LockState state = { FFX_FALSE, FFX_FALSE }; + const FfxFloat32 fNewLockIntensity = LoadRwNewLocks(params.iPxHrPos); + state.NewLock = fNewLockIntensity > (127.0f / 255.0f); - fReprojectedLockStatus = SampleLockStatus(fReprojectedHrUv); + FfxFloat32 fInPlaceLockLifetime = state.NewLock ? fNewLockIntensity : 0; - // Keep lifetime if new lock - if (fInPlaceLockLifetime < 0.0f) { - fReprojectedLockStatus[LOCK_LIFETIME_REMAINING] = fInPlaceLockLifetime; + fReprojectedLockStatus = SampleLockStatus(params.fReprojectedHrUv); + + if (fReprojectedLockStatus[LOCK_LIFETIME_REMAINING] != FfxFloat32(0.0f)) { + state.WasLockedPrevFrame = true; } + + return state; } #endif //!defined( FFX_FSR2_REPROJECT_H ) diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_resources.h b/Assets/Resources/FSR2/shaders/ffx_fsr2_resources.h index 89734f6..535dbc3 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_resources.h +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_resources.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -24,59 +24,76 @@ #if defined(FFX_CPU) || defined(FFX_GPU) #define FFX_FSR2_RESOURCE_IDENTIFIER_NULL 0 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR 1 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS 2 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH 3 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE 4 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK 5 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK 6 -#define FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH 7 -#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS 8 -#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH 9 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR 10 -#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS 11 -#define FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP 12 -#define FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR 13 -#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY 14 -#define FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT 15 -#define FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT 16 -#define FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT 17 -#define FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT 18 -#define FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT 19 -#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1 20 -#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2 21 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1 22 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 23 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY 24 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_TRANSPARENCY_AND_COMPOSITION 25 -#define FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT 26 -#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS 27 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE 28 // same as FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_0 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_0 28 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_1 29 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_2 30 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_3 31 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_4 32 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_5 33 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_6 34 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_7 35 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_8 36 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_9 37 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_10 38 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_11 39 -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_12 40 -#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE 41 -#define FFX_FSR2_RESOURCE_IDENTIFIER_EXPOSURE 42 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY 1 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR 2 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS 3 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH 4 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE 5 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK 6 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK 7 +#define FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH 8 +#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS 9 +#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH 10 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR 11 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS 12 +#define FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS 13 +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR 14 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY 15 +#define FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT 16 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT 17 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT 18 +#define FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT 19 +#define FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT 20 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1 21 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2 22 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1 23 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 24 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY 25 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_TRANSPARENCY_AND_COMPOSITION 26 +#define FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT 27 +#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS 28 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE 29 // same as FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0 29 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_1 30 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_2 31 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_3 32 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4 33 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5 34 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_6 35 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_7 36 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_8 37 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_9 38 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_10 39 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_11 40 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12 41 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE 42 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE 43 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE 44 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION 45 -// Shading change detection mip level setting, value must be in the range [FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_0, FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_12] -#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_SHADING_CHANGE FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_4 -#define FFX_FSR2_SHADING_CHANGE_MIP_LEVEL (FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_SHADING_CHANGE - FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE) +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR 46 +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR 47 +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1 48 +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1 49 +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2 50 +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2 51 +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS 52 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1 53 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2 54 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1 55 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2 56 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA 57 -#define FFX_FSR2_RESOURCE_IDENTIFIER_COUNT 43 +// Shading change detection mip level setting, value must be in the range [FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0, FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12] +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4 +#define FFX_FSR2_SHADING_CHANGE_MIP_LEVEL (FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE - FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE) -#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2 0 -#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD 1 -#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS 2 +#define FFX_FSR2_RESOURCE_IDENTIFIER_COUNT 58 + +#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2 0 +#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD 1 +#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS 2 +#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE 3 #define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP 1 #define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP 2 diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_sample.h b/Assets/Resources/FSR2/shaders/ffx_fsr2_sample.h index cfa9db8..f94f40a 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_sample.h +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_sample.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -137,16 +137,19 @@ FfxFloat32 Lanczos2(FfxFloat32 x) } #if FFX_HALF + +#if 0 FFX_MIN16_F Lanczos2NoClamp(FFX_MIN16_F x) { const FFX_MIN16_F PI = FFX_MIN16_F(3.141592653589793f); // TODO: share SDK constants return abs(x) < FFX_MIN16_F(FSR2_EPSILON) ? FFX_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFX_MIN16_F(0.5f) * PI * x) / (FFX_MIN16_F(0.5f) * PI * x)); } +#endif FFX_MIN16_F Lanczos2(FFX_MIN16_F x) { x = ffxMin(abs(x), FFX_MIN16_F(2.0f)); - return Lanczos2NoClamp(x); + return FFX_MIN16_F(Lanczos2NoClamp(x)); } #endif //FFX_HALF @@ -566,12 +569,12 @@ FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN1 #define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \ FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ { \ - FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f); \ - FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ + FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \ /* Clamp base coords */ \ - iPxSample.x = ffxMax(0, ffxMin(iPxSample.x, iTextureSize.x - 1)); \ - iPxSample.y = ffxMax(0, ffxMin(iPxSample.y, iTextureSize.y - 1)); \ + fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \ + fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \ /* */ \ + FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \ FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ return fColorXY; \ @@ -580,12 +583,12 @@ FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN1 #define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \ FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ { \ - FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f); \ - FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ + FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \ /* Clamp base coords */ \ - iPxSample.x = ffxMax(0, ffxMin(iPxSample.x, iTextureSize.x - 1)); \ - iPxSample.y = ffxMax(0, ffxMin(iPxSample.y, iTextureSize.y - 1)); \ + fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \ + fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \ /* */ \ + FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \ FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ return fColorXY; \ diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen.h b/Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen.h new file mode 100644 index 0000000..101b75d --- /dev/null +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen.h @@ -0,0 +1,250 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define USE_YCOCG 1 + +#define fAutogenEpsilon 0.01f + +// EXPERIMENTAL + +FFX_MIN16_F ComputeAutoTC_01(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) +{ + FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId); + FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId); + FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx); + FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx); + +#if USE_YCOCG + colorPreAlpha = RGBToYCoCg(colorPreAlpha); + colorPostAlpha = RGBToYCoCg(colorPostAlpha); + colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha); + colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha); +#endif + + FfxFloat32x3 colorDeltaCurr = colorPostAlpha - colorPreAlpha; + FfxFloat32x3 colorDeltaPrev = colorPrevPostAlpha - colorPrevPreAlpha; + bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDeltaCurr), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon))); + bool hadAlpha = any(FFX_GREATER_THAN(abs(colorDeltaPrev), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon))); + + FfxFloat32x3 X = colorPreAlpha; + FfxFloat32x3 Y = colorPostAlpha; + FfxFloat32x3 Z = colorPrevPreAlpha; + FfxFloat32x3 W = colorPrevPostAlpha; + + FFX_MIN16_F retVal = FFX_MIN16_F(ffxSaturate(dot(abs(abs(Y - X) - abs(W - Z)), FfxFloat32x3(1, 1, 1)))); + + // cleanup very small values + retVal = (retVal < getTcThreshold()) ? FFX_MIN16_F(0.0f) : FFX_MIN16_F(1.f); + + return retVal; +} + +// works ok: thin edges +FFX_MIN16_F ComputeAutoTC_02(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) +{ + FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId); + FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId); + FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx); + FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx); + +#if USE_YCOCG + colorPreAlpha = RGBToYCoCg(colorPreAlpha); + colorPostAlpha = RGBToYCoCg(colorPostAlpha); + colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha); + colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha); +#endif + + FfxFloat32x3 colorDelta = colorPostAlpha - colorPreAlpha; + FfxFloat32x3 colorPrevDelta = colorPrevPostAlpha - colorPrevPreAlpha; + bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon))); + bool hadAlpha = any(FFX_GREATER_THAN(abs(colorPrevDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon))); + + FfxFloat32x3 delta = colorPostAlpha - colorPreAlpha; //prev+1*d = post => d = color, alpha = + FfxFloat32x3 deltaPrev = colorPrevPostAlpha - colorPrevPreAlpha; + + FfxFloat32x3 X = colorPrevPreAlpha; + FfxFloat32x3 N = colorPreAlpha - colorPrevPreAlpha; + FfxFloat32x3 YAminusXA = colorPrevPostAlpha - colorPrevPreAlpha; + FfxFloat32x3 NminusNA = colorPostAlpha - colorPrevPostAlpha; + + FfxFloat32x3 A = (hasAlpha || hadAlpha) ? NminusNA / max(FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon), N) : FfxFloat32x3(0, 0, 0); + + FFX_MIN16_F retVal = FFX_MIN16_F( max(max(A.x, A.y), A.z) ); + + // only pixels that have significantly changed in color shuold be considered + retVal = ffxSaturate(retVal * FFX_MIN16_F(length(colorPostAlpha - colorPrevPostAlpha)) ); + + return retVal; +} + +// This function computes the TransparencyAndComposition mask: +// This mask indicates pixels that should discard locks and apply color clamping. +// +// Typically this is the case for translucent pixels (that don't write depth values) or pixels where the correctness of +// the MVs can not be guaranteed (e.g. procedutal movement or vegetation that does not have MVs to reduce the cost during rasterization) +// Also, large changes in color due to changed lighting should be marked to remove locks on pixels with "old" lighting. +// +// This function takes a opaque only and a final texture and uses internal copies of those textures from the last frame. +// The function tries to determine where the color changes between opaque only and final image to determine the pixels that use transparency. +// Also it uses the previous frames and detects where the use of transparency changed to mark those pixels. +// Additionally it marks pixels where the color changed significantly in the opaque only image, e.g. due to lighting or texture animation. +// +// In the final step it stores the current textures in internal textures for the next frame + +FFX_MIN16_F ComputeTransparencyAndComposition(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) +{ + FFX_MIN16_F retVal = ComputeAutoTC_02(uDispatchThreadId, iPrevIdx); + + // [branch] + if (retVal > FFX_MIN16_F(0.01f)) + { + retVal = ComputeAutoTC_01(uDispatchThreadId, iPrevIdx); + } + return retVal; +} + +float computeSolidEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos) +{ + float lum[9]; + int i = 0; + for (int y = -1; y < 2; ++y) + { + for (int x = -1; x < 2; ++x) + { + FfxFloat32x3 curCol = LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb; + FfxFloat32x3 prevCol = LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb; + lum[i++] = length(curCol - prevCol); + } + } + + //float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]); + //float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]); + + //return sqrt(gradX * gradX + gradY * gradY); + + float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]); + float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]); + + return sqrt(sqrt(gradX * gradY)); +} + +float computeAlphaEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos) +{ + float lum[9]; + int i = 0; + for (int y = -1; y < 2; ++y) + { + for (int x = -1; x < 2; ++x) + { + FfxFloat32x3 curCol = abs(LoadInputColor(curPos + FFX_MIN16_I2(x, y)).rgb - LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb); + FfxFloat32x3 prevCol = abs(LoadPrevPostAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb - LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb); + lum[i++] = length(curCol - prevCol); + } + } + + //float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]); + //float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]); + + //return sqrt(gradX * gradX + gradY * gradY); + + float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]); + float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]); + + return sqrt(sqrt(gradX * gradY)); +} + +FFX_MIN16_F ComputeAabbOverlap(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) +{ + FFX_MIN16_F retVal = FFX_MIN16_F(0.f); + + FfxFloat32x2 fMotionVector = LoadInputMotionVector(uDispatchThreadId); + FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId); + FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId); + FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx); + FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx); + +#if USE_YCOCG + colorPreAlpha = RGBToYCoCg(colorPreAlpha); + colorPostAlpha = RGBToYCoCg(colorPostAlpha); + colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha); + colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha); +#endif + FfxFloat32x3 minPrev = FFX_MIN16_F3(+1000.f, +1000.f, +1000.f); + FfxFloat32x3 maxPrev = FFX_MIN16_F3(-1000.f, -1000.f, -1000.f); + for (int y = -1; y < 2; ++y) + { + for (int x = -1; x < 2; ++x) + { + FfxFloat32x3 W = LoadPrevPostAlpha(iPrevIdx + FFX_MIN16_I2(x, y)); + +#if USE_YCOCG + W = RGBToYCoCg(W); +#endif + minPrev = min(minPrev, W); + maxPrev = max(maxPrev, W); + } + } + // instead of computing the overlap: simply count how many samples are outside + // set reactive based on that + FFX_MIN16_F count = FFX_MIN16_F(0.f); + for (int y = -1; y < 2; ++y) + { + for (int x = -1; x < 2; ++x) + { + FfxFloat32x3 Y = LoadInputColor(uDispatchThreadId + FFX_MIN16_I2(x, y)); + +#if USE_YCOCG + Y = RGBToYCoCg(Y); +#endif + count += ((Y.x < minPrev.x) || (Y.x > maxPrev.x)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f); + count += ((Y.y < minPrev.y) || (Y.y > maxPrev.y)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f); + count += ((Y.z < minPrev.z) || (Y.z > maxPrev.z)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f); + } + } + retVal = count / FFX_MIN16_F(27.f); + + return retVal; +} + + +// This function computes the Reactive mask: +// We want pixels marked where the alpha portion of the frame changes a lot between neighbours +// Those pixels are expected to change quickly between frames, too. (e.g. small particles, reflections on curved surfaces...) +// As a result history would not be trustworthy. +// On the other hand we don't want pixels marked where pre-alpha has a large differnce, since those would profit from accumulation +// For mirrors we may assume the pre-alpha is pretty uniform color. +// +// This works well generally, but also marks edge pixels +FFX_MIN16_F ComputeReactive(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) +{ + // we only get here if alpha has a significant contribution and has changed since last frame. + FFX_MIN16_F retVal = FFX_MIN16_F(0.f); + + // mark pixels with huge variance in alpha as reactive + FFX_MIN16_F alphaEdge = FFX_MIN16_F(computeAlphaEdge(uDispatchThreadId, iPrevIdx)); + FFX_MIN16_F opaqueEdge = FFX_MIN16_F(computeSolidEdge(uDispatchThreadId, iPrevIdx)); + retVal = ffxSaturate(alphaEdge - opaqueEdge); + + // the above also marks edge pixels due to jitter, so we need to cancel those out + + + return retVal; +} diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen.h.meta b/Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen.h.meta new file mode 100644 index 0000000..9394b7f --- /dev/null +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 6f2847e89a16de145b7f3d8d57e402b2 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen_pass.glsl b/Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen_pass.glsl new file mode 100644 index 0000000..bebca91 --- /dev/null +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen_pass.glsl @@ -0,0 +1,116 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require + +#define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY 0 +#define FSR2_BIND_SRV_INPUT_COLOR 1 +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 2 +#define FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR 3 +#define FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR 4 +#define FSR2_BIND_SRV_REACTIVE_MASK 5 +#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 6 + +#define FSR2_BIND_UAV_AUTOREACTIVE 7 +#define FSR2_BIND_UAV_AUTOCOMPOSITION 8 +#define FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR 9 +#define FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR 10 + +#define FSR2_BIND_CB_FSR2 11 +#define FSR2_BIND_CB_REACTIVE 12 + +#include "ffx_fsr2_callbacks_glsl.h" +#include "ffx_fsr2_common.h" + +#ifdef FSR2_BIND_CB_REACTIVE +layout (set = 1, binding = FSR2_BIND_CB_REACTIVE, std140) uniform cbGenerateReactive_t +{ + float fTcThreshold; // 0.1 is a good starting value, lower will result in more TC pixels + float fTcScale; + float fReactiveScale; + float fReactiveMax; +} cbGenerateReactive; + +float getTcThreshold() +{ + return cbGenerateReactive.fTcThreshold; +} + +#else + float getTcThreshold() + { + return 0.05f; + } +#endif + +#include "ffx_fsr2_tcr_autogen.h" + +#ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#define FFX_FSR2_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 +#endif // FFX_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#define FFX_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#ifndef FFX_FSR2_NUM_THREADS +#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FSR2_NUM_THREADS + +FFX_FSR2_NUM_THREADS +void main() +{ + FFX_MIN16_I2 uDispatchThreadId = FFX_MIN16_I2(gl_GlobalInvocationID.xy); + + // ToDo: take into account jitter (i.e. add delta of previous jitter and current jitter to previous UV + // fetch pre- and post-alpha color values + FFX_MIN16_F2 fUv = ( FFX_MIN16_F2(uDispatchThreadId) + FFX_MIN16_F2(0.5f, 0.5f) ) / FFX_MIN16_F2( RenderSize() ); + FFX_MIN16_F2 fPrevUV = fUv + FFX_MIN16_F2( LoadInputMotionVector(uDispatchThreadId) ); + FFX_MIN16_I2 iPrevIdx = FFX_MIN16_I2(fPrevUV * FFX_MIN16_F2(RenderSize()) - 0.5f); + + FFX_MIN16_F3 colorPreAlpha = FFX_MIN16_F3( LoadOpaqueOnly( uDispatchThreadId ) ); + FFX_MIN16_F3 colorPostAlpha = FFX_MIN16_F3( LoadInputColor( uDispatchThreadId ) ); + + FFX_MIN16_F2 outReactiveMask = FFX_MIN16_F2( 0.f, 0.f ); + + outReactiveMask.y = ComputeTransparencyAndComposition(uDispatchThreadId, iPrevIdx); + + if (outReactiveMask.y > 0.5f) + { + outReactiveMask.x = ComputeReactive(uDispatchThreadId, iPrevIdx); + outReactiveMask.x *= FFX_MIN16_F(cbGenerateReactive.fReactiveScale); + outReactiveMask.x = outReactiveMask.x < cbGenerateReactive.fReactiveMax ? outReactiveMask.x : FFX_MIN16_F( cbGenerateReactive.fReactiveMax ); + } + + outReactiveMask.y *= FFX_MIN16_F(cbGenerateReactive.fTcScale); + + outReactiveMask.x = ffxMax(outReactiveMask.x, FFX_MIN16_F(LoadReactiveMask(uDispatchThreadId))); + outReactiveMask.y = ffxMax(outReactiveMask.y, FFX_MIN16_F(LoadTransparencyAndCompositionMask(uDispatchThreadId))); + + StoreAutoReactive(uDispatchThreadId, outReactiveMask); + + StorePrevPreAlpha(uDispatchThreadId, colorPreAlpha); + StorePrevPostAlpha(uDispatchThreadId, colorPostAlpha); +} diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen_pass.glsl.meta b/Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen_pass.glsl.meta new file mode 100644 index 0000000..fb3bba0 --- /dev/null +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen_pass.glsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 9fa6f2f6f5a17084da2790fb96ee4c03 +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen_pass.hlsl b/Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen_pass.hlsl new file mode 100644 index 0000000..8e635d1 --- /dev/null +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen_pass.hlsl @@ -0,0 +1,114 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY 0 +#define FSR2_BIND_SRV_INPUT_COLOR 1 +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 2 +#define FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR 3 +#define FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR 4 +#define FSR2_BIND_SRV_REACTIVE_MASK 4 +#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 5 + +#define FSR2_BIND_UAV_AUTOREACTIVE 0 +#define FSR2_BIND_UAV_AUTOCOMPOSITION 1 +#define FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR 2 +#define FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR 3 + +#define FSR2_BIND_CB_FSR2 0 +#define FSR2_BIND_CB_AUTOREACTIVE 1 + +#include "ffx_fsr2_callbacks_hlsl.h" +#include "ffx_fsr2_common.h" + +#if defined(FSR2_BIND_CB_AUTOREACTIVE) + cbuffer cbGenerateReactive : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_AUTOREACTIVE) + { + float fTcThreshold; // 0.1 is a good starting value, lower will result in more TC pixels + float fTcScale; + float fReactiveScale; + float fReactiveMax; + }; + float getTcThreshold() + { + return fTcThreshold; + } +#else + #define fTcThreshold 0.05f + #define fTcScale 1.00f + #define fReactiveScale 10.0f + #define fReactiveMax 0.90f + float getTcThreshold() + { + return fTcThreshold; + } +#endif + +#include "ffx_fsr2_tcr_autogen.h" + +#ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#define FFX_FSR2_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 +#endif // FFX_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#define FFX_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#ifndef FFX_FSR2_NUM_THREADS +#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR2_NUM_THREADS + +FFX_FSR2_NUM_THREADS +FFX_FSR2_EMBED_ROOTSIG_CONTENT +void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) +{ + FFX_MIN16_I2 uDispatchThreadId = FFX_MIN16_I2(uGroupId * uint2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + uGroupThreadId); + + // ToDo: take into account jitter (i.e. add delta of previous jitter and current jitter to previous UV + // fetch pre- and post-alpha color values + FFX_MIN16_F2 fUv = ( FFX_MIN16_F2(uDispatchThreadId) + FFX_MIN16_F2(0.5f, 0.5f) ) / FFX_MIN16_F2( RenderSize() ); + FFX_MIN16_F2 fPrevUV = fUv + FFX_MIN16_F2( LoadInputMotionVector(uDispatchThreadId) ); + FFX_MIN16_I2 iPrevIdx = FFX_MIN16_I2(fPrevUV * FFX_MIN16_F2(RenderSize()) - 0.5f); + + FFX_MIN16_F3 colorPreAlpha = FFX_MIN16_F3( LoadOpaqueOnly( uDispatchThreadId ) ); + FFX_MIN16_F3 colorPostAlpha = FFX_MIN16_F3( LoadInputColor( uDispatchThreadId ) ); + + FFX_MIN16_F2 outReactiveMask = 0; + + outReactiveMask.y = ComputeTransparencyAndComposition(uDispatchThreadId, iPrevIdx); + + if (outReactiveMask.y > 0.5f) + { + outReactiveMask.x = ComputeReactive(uDispatchThreadId, iPrevIdx); + outReactiveMask.x *= FFX_MIN16_F(fReactiveScale); + outReactiveMask.x = outReactiveMask.x < fReactiveMax ? outReactiveMask.x : FFX_MIN16_F( fReactiveMax ); + } + + outReactiveMask.y *= FFX_MIN16_F(fTcScale ); + + outReactiveMask.x = max( outReactiveMask.x, FFX_MIN16_F( LoadReactiveMask(uDispatchThreadId) ) ); + outReactiveMask.y = max( outReactiveMask.y, FFX_MIN16_F( LoadTransparencyAndCompositionMask(uDispatchThreadId) ) ); + + StoreAutoReactive(uDispatchThreadId, outReactiveMask); + + StorePrevPreAlpha(uDispatchThreadId, colorPreAlpha); + StorePrevPostAlpha(uDispatchThreadId, colorPostAlpha); +} diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen_pass.hlsl.meta b/Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen_pass.hlsl.meta new file mode 100644 index 0000000..733cab7 --- /dev/null +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_tcr_autogen_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 28d10665f0175ed4bb9571389f5bb258 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Resources/FSR2/shaders/ffx_fsr2_upsample.h b/Assets/Resources/FSR2/shaders/ffx_fsr2_upsample.h index 80524d4..abdb888 100644 --- a/Assets/Resources/FSR2/shaders/ffx_fsr2_upsample.h +++ b/Assets/Resources/FSR2/shaders/ffx_fsr2_upsample.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -22,28 +22,26 @@ #ifndef FFX_FSR2_UPSAMPLE_H #define FFX_FSR2_UPSAMPLE_H -#define FFX_FSR2_OPTION_GUARANTEE_POSITIVE_UPSAMPLE_WEIGHT 0 - FFX_STATIC const FfxUInt32 iLanczos2SampleCount = 16; -void Deringing(RectificationBoxData clippingBox, FFX_PARAMETER_INOUT FfxFloat32x3 fColor) +void Deringing(RectificationBox clippingBox, FFX_PARAMETER_INOUT FfxFloat32x3 fColor) { fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax); } #if FFX_HALF -void Deringing(RectificationBoxDataMin16 clippingBox, FFX_PARAMETER_INOUT FFX_MIN16_F3 fColor) +void Deringing(RectificationBoxMin16 clippingBox, FFX_PARAMETER_INOUT FFX_MIN16_F3 fColor) { fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax); } #endif #ifndef FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE -#define FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE 1 // Approximate +#define FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE 2 // Approximate #endif -FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32x2 fKernelWeight) +FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32 fKernelWeight) { - FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight; + FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx; #if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE FfxFloat32 fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased)); #elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT @@ -57,15 +55,16 @@ FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32x2 } #if FFX_HALF -FFX_MIN16_F GetUpsampleLanczosWeight(FFX_MIN16_F2 fSrcSampleOffset, FFX_MIN16_F2 fKernelWeight) +FFX_MIN16_F GetUpsampleLanczosWeight(FFX_MIN16_F2 fSrcSampleOffset, FFX_MIN16_F fKernelWeight) { - FFX_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight; + FFX_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx; #if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE FFX_MIN16_F fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased)); -#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_APPROXIMATE - FFX_MIN16_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); -#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_LUT +#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT FFX_MIN16_F fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased)); +#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE + FFX_MIN16_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); + // To Test: Save reciproqual sqrt compute // FfxFloat32 fSampleWeight = Lanczos2Sq_UseLUT(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); #else @@ -75,44 +74,33 @@ FFX_MIN16_F GetUpsampleLanczosWeight(FFX_MIN16_F2 fSrcSampleOffset, FFX_MIN16_F2 } #endif -FfxFloat32 Pow3(FfxFloat32 x) -{ - return x * x * x; -} +FfxFloat32 ComputeMaxKernelWeight() { + const FfxFloat32 fKernelSizeBias = 1.0f; -#if FX_HALF -FFX_MIN16_F Pow3(FFX_MIN16_F x) -{ - return x * x * x; + FfxFloat32 fKernelWeight = FfxFloat32(1) + (FfxFloat32(1.0f) / FfxFloat32x2(DownscaleFactor()) - FfxFloat32(1)).x * FfxFloat32(fKernelSizeBias); + + return ffxMin(FfxFloat32(1.99f), fKernelWeight); } -#endif -FfxFloat32x4 ComputeUpsampledColorAndWeight(FfxInt32x2 iPxHrPos, FfxFloat32x2 fKernelWeight, FFX_PARAMETER_INOUT RectificationBoxData clippingBox) +FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params, + FFX_PARAMETER_INOUT RectificationBox clippingBox, FfxFloat32 fReactiveFactor) { -#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF -#include "ffx_fsr2_force16_begin.h" -#endif + #if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF + #include "ffx_fsr2_force16_begin.h" + #endif // We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly) - FfxFloat32x2 fDstOutputPos = FfxFloat32x2(iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f); // Destination resolution output pixel center position + FfxFloat32x2 fDstOutputPos = FfxFloat32x2(params.iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f); // Destination resolution output pixel center position FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor(); // Source resolution output pixel center position FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos)); // TODO: what about weird upscale factors... -#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF -#include "ffx_fsr2_force16_end.h" -#endif - -#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF -#include "ffx_fsr2_force16_begin.h" - RectificationBoxMin16 fRectificationBox; -#else - RectificationBox fRectificationBox; -#endif + #if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF + #include "ffx_fsr2_force16_end.h" + #endif FfxFloat32x3 fSamples[iLanczos2SampleCount]; - FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0 - + FfxInt32x2 offsetTL; offsetTL.x = (fSrcUnjitteredPos.x > fSrcOutputPos.x) ? FfxInt32(-2) : FfxInt32(-1); offsetTL.y = (fSrcUnjitteredPos.y > fSrcOutputPos.y) ? FfxInt32(-2) : FfxInt32(-1); @@ -127,30 +115,37 @@ FfxFloat32x4 ComputeUpsampledColorAndWeight(FfxInt32x2 iPxHrPos, FfxFloat32x2 fK FfxFloat32x2 fOffsetTL = FfxFloat32x2(offsetTL); FFX_UNROLL - for (FfxInt32 row = 0; row < 4; row++) { + for (FfxInt32 row = 0; row < 3; row++) { FFX_UNROLL - for (FfxInt32 col = 0; col < 4; col++) { - FfxInt32 iSampleIndex = col + (row << 2); + for (FfxInt32 col = 0; col < 3; col++) { + FfxInt32 iSampleIndex = col + (row << 2); - FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); - FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + offsetTL + sampleColRow; + FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); + FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + offsetTL + sampleColRow; - const FfxInt32x2 sampleCoord = ClampLoad(iSrcSamplePos, FfxInt32x2(0, 0), FfxInt32x2(RenderSize())); + const FfxInt32x2 sampleCoord = ClampLoad(iSrcSamplePos, FfxInt32x2(0, 0), FfxInt32x2(RenderSize())); - fSamples[iSampleIndex] = LoadPreparedInputColor(FfxInt32x2(sampleCoord)); - } + fSamples[iSampleIndex] = LoadPreparedInputColor(FfxInt32x2(sampleCoord)); + } } - RectificationBoxReset(fRectificationBox, fSamples[0]); + FfxFloat32x4 fColorAndWeight = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f); - FfxFloat32x3 fColor = FfxFloat32x3(0.f, 0.f, 0.f); - FfxFloat32 fWeight = FfxFloat32(0.f); FfxFloat32x2 fBaseSampleOffset = FfxFloat32x2(fSrcUnjitteredPos - fSrcOutputPos); + // Identify how much of each upsampled color to be used for this frame + const FfxFloat32 fKernelReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32(params.bIsNewSample)); + const FfxFloat32 fKernelBiasMax = ComputeMaxKernelWeight() * (1.0f - fKernelReactiveFactor); + + const FfxFloat32 fKernelBiasMin = ffxMax(1.0f, ((1.0f + fKernelBiasMax) * 0.3f)); + const FfxFloat32 fKernelBiasFactor = ffxMax(0.0f, ffxMax(0.25f * params.fDepthClipFactor, fKernelReactiveFactor)); + const FfxFloat32 fKernelBias = ffxLerp(fKernelBiasMax, fKernelBiasMin, fKernelBiasFactor); + + const FfxFloat32 fRectificationCurveBias = ffxLerp(-2.0f, -3.0f, ffxSaturate(params.fHrVelocity / 50.0f)); + FFX_UNROLL for (FfxInt32 row = 0; row < 3; row++) { - FFX_UNROLL for (FfxInt32 col = 0; col < 3; col++) { FfxInt32 iSampleIndex = col + (row << 2); @@ -161,54 +156,39 @@ FfxFloat32x4 ComputeUpsampledColorAndWeight(FfxInt32x2 iPxHrPos, FfxFloat32x2 fK FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow; - FfxFloat32 fSampleWeight = FfxFloat32(IsOnScreen(FfxInt32x2(iSrcSamplePos), FfxInt32x2(RenderSize()))) * GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelWeight); + const FfxFloat32 fOnScreenFactor = FfxFloat32(IsOnScreen(FfxInt32x2(iSrcSamplePos), FfxInt32x2(RenderSize()))); + FfxFloat32 fSampleWeight = fOnScreenFactor * FfxFloat32(GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelBias)); + + fColorAndWeight += FfxFloat32x4(fSamples[iSampleIndex] * fSampleWeight, fSampleWeight); // Update rectification box - const FfxFloat32 fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset); - FfxFloat32 fBoxSampleWeight = FfxFloat32(1) - ffxSaturate(fSrcSampleOffsetSq / FfxFloat32(3)); - fBoxSampleWeight *= fBoxSampleWeight; - RectificationBoxAddSample(fRectificationBox, fSamples[iSampleIndex], fBoxSampleWeight); + { + const FfxFloat32 fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset); + const FfxFloat32 fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq); - fWeight += fSampleWeight; - fColor += fSampleWeight * fSamples[iSampleIndex]; + const FfxBoolean bInitialSample = (row == 0) && (col == 0); + RectificationBoxAddSample(bInitialSample, clippingBox, fSamples[iSampleIndex], fBoxSampleWeight); + } } } - // Normalize for deringing (we need to compare colors) - fColor = fColor / (abs(fWeight) > FSR2_EPSILON ? fWeight : FfxFloat32(1.f)); - - RectificationBoxComputeVarianceBoxData(fRectificationBox); -#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF - RectificationBoxDataMin16 rectificationData = RectificationBoxGetData(fRectificationBox); - clippingBox.aabbMax = rectificationData.aabbMax; - clippingBox.aabbMin = rectificationData.aabbMin; - clippingBox.boxCenter = rectificationData.boxCenter; - clippingBox.boxVec = rectificationData.boxVec; -#else - RectificationBoxData rectificationData = RectificationBoxGetData(fRectificationBox); - clippingBox = rectificationData; -#endif - Deringing(rectificationData, fColor); + RectificationBoxComputeVarianceBoxData(clippingBox); -#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF - clippingBox.aabbMax = rectificationData.aabbMax; - clippingBox.aabbMin = rectificationData.aabbMin; - clippingBox.boxCenter = rectificationData.boxCenter; - clippingBox.boxVec = rectificationData.boxVec; -#endif + fColorAndWeight.w *= FfxFloat32(fColorAndWeight.w > FSR2_EPSILON); + + if (fColorAndWeight.w > FSR2_EPSILON) { + // Normalize for deringing (we need to compare colors) + fColorAndWeight.xyz = fColorAndWeight.xyz / fColorAndWeight.w; + fColorAndWeight.w *= fUpsampleLanczosWeightScale; - if (any(FFX_LESS_THAN(fKernelWeight, FfxFloat32x2(1, 1)))) { - fWeight = FfxFloat32(averageLanczosWeightPerFrame); + Deringing(clippingBox, fColorAndWeight.xyz); } -#if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF -#include "ffx_fsr2_force16_end.h" -#endif -#if FFX_FSR2_OPTION_GUARANTEE_POSITIVE_UPSAMPLE_WEIGHT - return FfxFloat32x4(fColor, ffxMax(FfxFloat32(FSR2_EPSILON), fWeight)); -#else - return FfxFloat32x4(fColor, ffxMax(FfxFloat32(0), fWeight)); -#endif + #if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF + #include "ffx_fsr2_force16_end.h" + #endif + + return fColorAndWeight; } #endif //!defined( FFX_FSR2_UPSAMPLE_H ) diff --git a/Assets/Resources/FSR2/shaders/ffx_spd.h b/Assets/Resources/FSR2/shaders/ffx_spd.h index 5a27a84..5ce24ec 100644 --- a/Assets/Resources/FSR2/shaders/ffx_spd.h +++ b/Assets/Resources/FSR2/shaders/ffx_spd.h @@ -1,6 +1,6 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal