diff --git a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl index d2f1b32..c000624 100644 --- a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl +++ b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,36 +20,31 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - -#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 0 -#define FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS 1 +#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 0 +#define FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS 1 #if FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS -#define FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS 2 +#define FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS 2 #else -#define FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS 2 +#define FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS 2 #endif -#define FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED 3 -#define FSR3UPSCALER_BIND_SRV_LOCK_STATUS 4 -#define FSR3UPSCALER_BIND_SRV_PREPARED_INPUT_COLOR 5 -#define FSR3UPSCALER_BIND_SRV_LANCZOS_LUT 6 -#define FSR3UPSCALER_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT 7 -#define FSR3UPSCALER_BIND_SRV_SCENE_LUMINANCE_MIPS 8 -#define FSR3UPSCALER_BIND_SRV_AUTO_EXPOSURE 9 -#define FSR3UPSCALER_BIND_SRV_LUMA_HISTORY 10 +#define FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED 3 +#define FSR3UPSCALER_BIND_SRV_LANCZOS_LUT 4 +#define FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH_MIP1 5 -#define FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED 0 -#define FSR3UPSCALER_BIND_UAV_LOCK_STATUS 1 -#define FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT 2 -#define FSR3UPSCALER_BIND_UAV_NEW_LOCKS 3 -#define FSR3UPSCALER_BIND_UAV_LUMA_HISTORY 4 +#define FSR3UPSCALER_BIND_SRV_CURRENT_LUMA 6 +#define FSR3UPSCALER_BIND_SRV_LUMA_INSTABILITY 7 +#define FSR3UPSCALER_BIND_SRV_INPUT_COLOR 8 -#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 +#define FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED 0 +#define FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT 1 +#define FSR3UPSCALER_BIND_UAV_NEW_LOCKS 2 + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 #include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" #include "fsr3upscaler/ffx_fsr3upscaler_common.h" #include "fsr3upscaler/ffx_fsr3upscaler_sample.h" #include "fsr3upscaler/ffx_fsr3upscaler_upsample.h" -#include "fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h" #include "fsr3upscaler/ffx_fsr3upscaler_reproject.h" #include "fsr3upscaler/ffx_fsr3upscaler_accumulate.h" @@ -68,12 +64,7 @@ FFX_PREFER_WAVE64 FFX_FSR3UPSCALER_NUM_THREADS FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT -void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) +void CS(int2 iDispatchThreadId : SV_DispatchThreadID) { - const uint GroupRows = (uint(DisplaySize().y) + FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT - 1) / FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT; - uGroupId.y = GroupRows - uGroupId.y - 1; - - uint2 uDispatchThreadId = uGroupId * uint2(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT) + uGroupThreadId; - - Accumulate(uDispatchThreadId); + Accumulate(iDispatchThreadId); } diff --git a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl index 0d6e2eb..c052a1d 100644 --- a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl +++ b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,13 +20,13 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - #define FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY 0 #define FSR3UPSCALER_BIND_SRV_INPUT_COLOR 1 #define FSR3UPSCALER_BIND_UAV_AUTOREACTIVE 0 +#define FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION 1 -#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 #define FSR3UPSCALER_BIND_CB_REACTIVE 1 #include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" diff --git a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_lock_pass.hlsl b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_debug_view_pass.hlsl similarity index 70% rename from Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_lock_pass.hlsl rename to Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_debug_view_pass.hlsl index 26b28f0..6ac5067 100644 --- a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_lock_pass.hlsl +++ b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_debug_view_pass.hlsl @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,18 +20,19 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. +#define FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS 0 +#define FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS 1 +#define FSR3UPSCALER_BIND_SRV_DILATED_DEPTH 2 +#define FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED 3 +#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 4 -#define FSR3UPSCALER_BIND_SRV_LOCK_INPUT_LUMA 0 +#define FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT 0 -#define FSR3UPSCALER_BIND_UAV_NEW_LOCKS 0 -#define FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 1 - -#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 #include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" #include "fsr3upscaler/ffx_fsr3upscaler_common.h" -#include "fsr3upscaler/ffx_fsr3upscaler_sample.h" -#include "fsr3upscaler/ffx_fsr3upscaler_lock.h" +#include "fsr3upscaler/ffx_fsr3upscaler_debug_view.h" #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH #define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 @@ -48,9 +50,7 @@ FFX_PREFER_WAVE64 FFX_FSR3UPSCALER_NUM_THREADS FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT -void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) +void CS(FfxInt32x2 iPxPos : SV_DispatchThreadID) { - uint2 uDispatchThreadId = uGroupId * uint2(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT) + uGroupThreadId; - - ComputeLock(uDispatchThreadId); + DebugView(iPxPos); } diff --git a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_lock_pass.hlsl.meta b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_debug_view_pass.hlsl.meta similarity index 75% rename from Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_lock_pass.hlsl.meta rename to Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_debug_view_pass.hlsl.meta index 45c99dc..5376385 100644 --- a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_lock_pass.hlsl.meta +++ b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_debug_view_pass.hlsl.meta @@ -1,5 +1,5 @@ fileFormatVersion: 2 -guid: 98d2cbbda5e90dd4ebd1d70abbb63a09 +guid: 6e10b90cb8c2cd74dabb4a577faa7c67 ShaderIncludeImporter: externalObjects: {} userData: diff --git a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_luma_instability_pass.hlsl b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_luma_instability_pass.hlsl new file mode 100644 index 0000000..44d97b3 --- /dev/null +++ b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_luma_instability_pass.hlsl @@ -0,0 +1,59 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 0 +#define FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS 1 +#define FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS 2 +#define FSR3UPSCALER_BIND_SRV_FRAME_INFO 3 +#define FSR3UPSCALER_BIND_SRV_LUMA_HISTORY 4 +#define FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH_MIP1 5 +#define FSR3UPSCALER_BIND_SRV_CURRENT_LUMA 6 + +#define FSR3UPSCALER_BIND_UAV_LUMA_HISTORY 0 +#define FSR3UPSCALER_BIND_UAV_LUMA_INSTABILITY 1 + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 + +#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" +#include "fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "fsr3upscaler/ffx_fsr3upscaler_luma_instability.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 +#endif // FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_PREFER_WAVE64 +FFX_FSR3UPSCALER_NUM_THREADS +FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT +void CS(int2 iDispatchThreadId : SV_DispatchThreadID) +{ + LumaInstability(iDispatchThreadId); +} diff --git a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_compute_luminance_pyramid_pass.hlsl.meta b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_luma_instability_pass.hlsl.meta similarity index 75% rename from Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_compute_luminance_pyramid_pass.hlsl.meta rename to Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_luma_instability_pass.hlsl.meta index 508b43e..92ba6f1 100644 --- a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_compute_luminance_pyramid_pass.hlsl.meta +++ b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_luma_instability_pass.hlsl.meta @@ -1,5 +1,5 @@ fileFormatVersion: 2 -guid: 2d149b52ba0f5bb468a94a71dbbcb66f +guid: 78e3794a851a9c4409a622bc569b195d ShaderIncludeImporter: externalObjects: {} userData: diff --git a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_compute_luminance_pyramid_pass.hlsl b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_luma_pyramid_pass.hlsl similarity index 70% rename from Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_compute_luminance_pyramid_pass.hlsl rename to Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_luma_pyramid_pass.hlsl index 93b7332..37fa1b7 100644 --- a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_compute_luminance_pyramid_pass.hlsl +++ b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_luma_pyramid_pass.hlsl @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,20 +20,26 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - -#define FSR3UPSCALER_BIND_SRV_INPUT_COLOR 0 +#define FSR3UPSCALER_BIND_SRV_CURRENT_LUMA 0 +#define FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH 1 #define FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC 0 -#define FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE 1 -#define FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5 2 -#define FSR3UPSCALER_BIND_UAV_AUTO_EXPOSURE 3 +#define FSR3UPSCALER_BIND_UAV_FRAME_INFO 1 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_0 2 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_1 3 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_2 4 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_3 5 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4 6 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5 7 +#define FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH_MIP1 8 + #define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 #define FSR3UPSCALER_BIND_CB_SPD 1 #include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" #include "fsr3upscaler/ffx_fsr3upscaler_common.h" -#include "fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h" +#include "fsr3upscaler/ffx_fsr3upscaler_luma_pyramid.h" #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH #define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 256 diff --git a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_reconstruct_previous_depth_pass.hlsl.meta b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_luma_pyramid_pass.hlsl.meta similarity index 75% rename from Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_reconstruct_previous_depth_pass.hlsl.meta rename to Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_luma_pyramid_pass.hlsl.meta index 6489d6d..3ca1b25 100644 --- a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_reconstruct_previous_depth_pass.hlsl.meta +++ b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_luma_pyramid_pass.hlsl.meta @@ -1,5 +1,5 @@ fileFormatVersion: 2 -guid: bafb3726a76b97a49bb343d8a4323754 +guid: 781aaeb95f903984a905ef657085a673 ShaderIncludeImporter: externalObjects: {} userData: diff --git a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_reconstruct_previous_depth_pass.hlsl b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_prepare_inputs_pass.hlsl similarity index 74% rename from Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_reconstruct_previous_depth_pass.hlsl rename to Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_prepare_inputs_pass.hlsl index f277fd1..d40b7d2 100644 --- a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_reconstruct_previous_depth_pass.hlsl +++ b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_prepare_inputs_pass.hlsl @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,23 +20,21 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. +#define FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS 0 +#define FSR3UPSCALER_BIND_SRV_INPUT_DEPTH 1 +#define FSR3UPSCALER_BIND_SRV_INPUT_COLOR 2 -#define FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS 0 -#define FSR3UPSCALER_BIND_SRV_INPUT_DEPTH 1 -#define FSR3UPSCALER_BIND_SRV_INPUT_COLOR 2 -#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 3 - -#define FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 -#define FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS 1 -#define FSR3UPSCALER_BIND_UAV_DILATED_DEPTH 2 -#define FSR3UPSCALER_BIND_UAV_LOCK_INPUT_LUMA 3 +#define FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS 0 +#define FSR3UPSCALER_BIND_UAV_DILATED_DEPTH 1 +#define FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 2 +#define FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH 3 +#define FSR3UPSCALER_BIND_UAV_CURRENT_LUMA 4 -#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 #include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" #include "fsr3upscaler/ffx_fsr3upscaler_common.h" -#include "fsr3upscaler/ffx_fsr3upscaler_sample.h" -#include "fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h" +#include "fsr3upscaler/ffx_fsr3upscaler_prepare_inputs.h" #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH #define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 @@ -53,12 +52,7 @@ FFX_PREFER_WAVE64 FFX_FSR3UPSCALER_NUM_THREADS FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT -void CS( - int2 iGroupId : SV_GroupID, - int2 iDispatchThreadId : SV_DispatchThreadID, - int2 iGroupThreadId : SV_GroupThreadID, - int iGroupIndex : SV_GroupIndex -) +void CS(int2 iDispatchThreadId : SV_DispatchThreadID) { - ReconstructAndDilate(iDispatchThreadId); + PrepareInputs(iDispatchThreadId); } diff --git a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_depth_clip_pass.hlsl.meta b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_prepare_inputs_pass.hlsl.meta similarity index 75% rename from Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_depth_clip_pass.hlsl.meta rename to Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_prepare_inputs_pass.hlsl.meta index cde3a5e..a561586 100644 --- a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_depth_clip_pass.hlsl.meta +++ b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_prepare_inputs_pass.hlsl.meta @@ -1,5 +1,5 @@ fileFormatVersion: 2 -guid: da435b71cf57e2247b80ae0f0f86d1f8 +guid: 71725b1aad250484a83d8124d0d8bef1 ShaderIncludeImporter: externalObjects: {} userData: diff --git a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_depth_clip_pass.hlsl b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_prepare_reactivity_pass.hlsl similarity index 71% rename from Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_depth_clip_pass.hlsl rename to Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_prepare_reactivity_pass.hlsl index 70cc7ba..b09e9b8 100644 --- a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_depth_clip_pass.hlsl +++ b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_prepare_reactivity_pass.hlsl @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,27 +20,25 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. +#define FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 +#define FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS 1 +#define FSR3UPSCALER_BIND_SRV_DILATED_DEPTH 2 +#define FSR3UPSCALER_BIND_SRV_REACTIVE_MASK 3 +#define FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 4 +#define FSR3UPSCALER_BIND_SRV_ACCUMULATION 5 +#define FSR3UPSCALER_BIND_SRV_SHADING_CHANGE 6 +#define FSR3UPSCALER_BIND_SRV_CURRENT_LUMA 7 +#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 8 -#define FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 -#define FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS 1 -#define FSR3UPSCALER_BIND_SRV_DILATED_DEPTH 2 -#define FSR3UPSCALER_BIND_SRV_REACTIVE_MASK 3 -#define FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 4 -#define FSR3UPSCALER_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS 5 -#define FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS 6 -#define FSR3UPSCALER_BIND_SRV_INPUT_COLOR 7 -#define FSR3UPSCALER_BIND_SRV_INPUT_DEPTH 8 -#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 9 - -#define FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS 0 -#define FSR3UPSCALER_BIND_UAV_PREPARED_INPUT_COLOR 1 +#define FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS 0 +#define FSR3UPSCALER_BIND_UAV_NEW_LOCKS 1 +#define FSR3UPSCALER_BIND_UAV_ACCUMULATION 2 #define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 #include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" #include "fsr3upscaler/ffx_fsr3upscaler_common.h" -#include "fsr3upscaler/ffx_fsr3upscaler_sample.h" -#include "fsr3upscaler/ffx_fsr3upscaler_depth_clip.h" +#include "fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity.h" #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH #define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 @@ -57,11 +56,7 @@ FFX_PREFER_WAVE64 FFX_FSR3UPSCALER_NUM_THREADS FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT -void CS( - int2 iGroupId : SV_GroupID, - int2 iDispatchThreadId : SV_DispatchThreadID, - int2 iGroupThreadId : SV_GroupThreadID, - int iGroupIndex : SV_GroupIndex) +void CS(int2 iDispatchThreadId : SV_DispatchThreadID) { - DepthClip(iDispatchThreadId); + PrepareReactivity(iDispatchThreadId); } diff --git a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_prepare_reactivity_pass.hlsl.meta b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_prepare_reactivity_pass.hlsl.meta new file mode 100644 index 0000000..cc6546c --- /dev/null +++ b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_prepare_reactivity_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 38d0f427c34ecfa4bad25b31adc7d0f8 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_rcas_pass.hlsl b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_rcas_pass.hlsl index bebdeb3..2963562 100644 --- a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_rcas_pass.hlsl +++ b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_rcas_pass.hlsl @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,14 +20,13 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. +#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 0 +#define FSR3UPSCALER_BIND_SRV_RCAS_INPUT 1 -#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 0 -#define FSR3UPSCALER_BIND_SRV_RCAS_INPUT 1 - -#define FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT 0 +#define FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT 0 -#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 -#define FSR3UPSCALER_BIND_CB_RCAS 1 +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 +#define FSR3UPSCALER_BIND_CB_RCAS 1 #include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" #include "fsr3upscaler/ffx_fsr3upscaler_common.h" diff --git a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_shading_change_pass.hlsl b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_shading_change_pass.hlsl new file mode 100644 index 0000000..ab2b545 --- /dev/null +++ b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_shading_change_pass.hlsl @@ -0,0 +1,52 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR3UPSCALER_BIND_SRV_SPD_MIPS 0 + +#define FSR3UPSCALER_BIND_UAV_SHADING_CHANGE 0 + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 + +#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" +#include "fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "fsr3upscaler/ffx_fsr3upscaler_shading_change.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_PREFER_WAVE64 +FFX_FSR3UPSCALER_NUM_THREADS +FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT +void CS(int2 iDispatchThreadId : SV_DispatchThreadID) +{ + ShadingChange(iDispatchThreadId); +} diff --git a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_shading_change_pass.hlsl.meta b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_shading_change_pass.hlsl.meta new file mode 100644 index 0000000..862a490 --- /dev/null +++ b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_shading_change_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 6edf1f8a51b98f84d858abb0cefb255f +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.hlsl b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.hlsl new file mode 100644 index 0000000..5403792 --- /dev/null +++ b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.hlsl @@ -0,0 +1,63 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR3UPSCALER_BIND_SRV_CURRENT_LUMA 0 +#define FSR3UPSCALER_BIND_SRV_PREVIOUS_LUMA 1 +#define FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS 2 +#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 3 + + +#define FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC 0 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_0 1 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_1 2 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_2 3 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_3 4 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4 5 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5 6 + + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 +#define FSR3UPSCALER_BIND_CB_SPD 1 + +#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" +#include "fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 256 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_FSR3UPSCALER_NUM_THREADS +FFX_FSR3UPSCALER_EMBED_CB2_ROOTSIG_CONTENT +void CS(uint3 WorkGroupId : SV_GroupID, uint LocalThreadIndex : SV_GroupIndex) +{ + ComputeShadingChangePyramid(WorkGroupId, LocalThreadIndex); +} diff --git a/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.hlsl.meta b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.hlsl.meta new file mode 100644 index 0000000..eea86ed --- /dev/null +++ b/Assets/Shaders/FSR3/shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 3fd145d1d6d4dfd4d9441cee231689bd +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_common_types.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_common_types.h index f0b62ab..2c4f0ba 100644 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_common_types.h +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_common_types.h @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,7 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - #ifndef FFX_COMMON_TYPES_H #define FFX_COMMON_TYPES_H @@ -106,7 +106,42 @@ typedef float FfxFloat32x3[3]; /// @ingroup CPUTypes typedef float FfxFloat32x4[4]; -/// A typedef for a 2-dimensional 32bit unsigned integer. +/// A typedef for a 2x2 floating point matrix. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x2x2[4]; + +/// A typedef for a 3x3 floating point matrix. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x3x3[9]; + +/// A typedef for a 3x4 floating point matrix. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x3x4[12]; + +/// A typedef for a 4x4 floating point matrix. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x4x4[16]; + +/// A typedef for a 2-dimensional 32bit signed integer. +/// +/// @ingroup CPUTypes +typedef int32_t FfxInt32x2[2]; + +/// A typedef for a 3-dimensional 32bit signed integer. +/// +/// @ingroup CPUTypes +typedef int32_t FfxInt32x3[3]; + +/// A typedef for a 4-dimensional 32bit signed integer. +/// +/// @ingroup CPUTypes +typedef int32_t FfxInt32x4[4]; + +/// A typedef for a 2-dimensional 32bit usigned integer. /// /// @ingroup CPUTypes typedef uint32_t FfxUInt32x2[2]; @@ -161,6 +196,7 @@ typedef float32_t4 FfxFloat32x4; /// A [cacao_placeholder] typedef for matrix type until confirmed. typedef float4x4 FfxFloat32x4x4; +typedef float3x4 FfxFloat32x3x4; typedef float3x3 FfxFloat32x3x3; typedef float2x2 FfxFloat32x2x2; @@ -218,6 +254,7 @@ typedef int32_t4 FfxInt32x4; /// A [cacao_placeholder] typedef for matrix type until confirmed. #define FfxFloat32x4x4 float4x4 +#define FfxFloat32x3x4 float3x4 #define FfxFloat32x3x3 float3x3 #define FfxFloat32x2x2 float2x2 @@ -260,29 +297,6 @@ typedef int16_t FfxInt16; typedef int16_t2 FfxInt16x2; typedef int16_t3 FfxInt16x3; typedef int16_t4 FfxInt16x4; -#elif SHADER_API_PSSL -#pragma argument(realtypes) // Enable true 16-bit types - -typedef half FfxFloat16; -typedef half2 FfxFloat16x2; -typedef half3 FfxFloat16x3; -typedef half4 FfxFloat16x4; - -/// A typedef for an unsigned 16bit integer. -/// -/// @ingroup GPU -typedef ushort FfxUInt16; -typedef ushort2 FfxUInt16x2; -typedef ushort3 FfxUInt16x3; -typedef ushort4 FfxUInt16x4; - -/// A typedef for a signed 16bit integer. -/// -/// @ingroup GPU -typedef short FfxInt16; -typedef short2 FfxInt16x2; -typedef short3 FfxInt16x3; -typedef short4 FfxInt16x4; #else // #if FFX_HLSL_SM>=62 typedef min16float FfxFloat16; typedef min16float2 FfxFloat16x2; @@ -334,6 +348,7 @@ typedef min16int4 FfxInt16x4; /// A [cacao_placeholder] typedef for matrix type until confirmed. #define FfxFloat32x4x4 mat4 +#define FfxFloat32x3x4 mat4x3 #define FfxFloat32x3x3 mat3 #define FfxFloat32x2x2 mat2 @@ -357,7 +372,7 @@ typedef min16int4 FfxInt16x4; // #define FFX_HALF (1) // #define FFX_HLSL_SM (62) -#if FFX_HALF && !defined(SHADER_API_PSSL) +#if FFX_HALF #if FFX_HLSL_SM >= 62 @@ -395,7 +410,7 @@ typedef min16int4 FfxInt16x4; #if defined(FFX_GPU) // Common typedefs: -#if defined(FFX_HLSL) && !defined(SHADER_API_PSSL) +#if defined(FFX_HLSL) FFX_MIN16_SCALAR( FFX_MIN16_F , float ); FFX_MIN16_VECTOR( FFX_MIN16_F2, float, 2 ); FFX_MIN16_VECTOR( FFX_MIN16_F3, float, 3 ); @@ -469,79 +484,6 @@ typedef FfxUInt32x3 Prefix##_U3; \ typedef FfxUInt32x4 Prefix##_U4; #endif // #if defined(FFX_HLSL) -#if defined(SHADER_API_PSSL) - -#define unorm -#define globallycoherent - -#if FFX_HALF - -#define FFX_MIN16_F half -#define FFX_MIN16_F2 half2 -#define FFX_MIN16_F3 half3 -#define FFX_MIN16_F4 half4 - -#define FFX_MIN16_I short -#define FFX_MIN16_I2 short2 -#define FFX_MIN16_I3 short3 -#define FFX_MIN16_I4 short4 - -#define FFX_MIN16_U ushort -#define FFX_MIN16_U2 ushort2 -#define FFX_MIN16_U3 ushort3 -#define FFX_MIN16_U4 ushort4 - -#define FFX_16BIT_F half -#define FFX_16BIT_F2 half2 -#define FFX_16BIT_F3 half3 -#define FFX_16BIT_F4 half4 - -#define FFX_16BIT_I short -#define FFX_16BIT_I2 short2 -#define FFX_16BIT_I3 short3 -#define FFX_16BIT_I4 short4 - -#define FFX_16BIT_U ushort -#define FFX_16BIT_U2 ushort2 -#define FFX_16BIT_U3 ushort3 -#define FFX_16BIT_U4 ushort4 - -#else // FFX_HALF - -#define FFX_MIN16_F float -#define FFX_MIN16_F2 float2 -#define FFX_MIN16_F3 float3 -#define FFX_MIN16_F4 float4 - -#define FFX_MIN16_I int -#define FFX_MIN16_I2 int2 -#define FFX_MIN16_I3 int3 -#define FFX_MIN16_I4 int4 - -#define FFX_MIN16_U uint -#define FFX_MIN16_U2 uint2 -#define FFX_MIN16_U3 uint3 -#define FFX_MIN16_U4 uint4 - -#define FFX_16BIT_F float -#define FFX_16BIT_F2 float2 -#define FFX_16BIT_F3 float3 -#define FFX_16BIT_F4 float4 - -#define FFX_16BIT_I int -#define FFX_16BIT_I2 int2 -#define FFX_16BIT_I3 int3 -#define FFX_16BIT_I4 int4 - -#define FFX_16BIT_U uint -#define FFX_16BIT_U2 uint2 -#define FFX_16BIT_U3 uint3 -#define FFX_16BIT_U4 uint4 - -#endif // FFX_HALF - -#endif // #if defined(SHADER_API_PSSL) - #if defined(FFX_GLSL) #if FFX_HALF diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core.h index 02f6b3f..d1ed144 100644 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core.h +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core.h @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,7 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - /// @defgroup FfxGPU GPU /// The FidelityFX SDK GPU References /// diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core_cpu.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core_cpu.h deleted file mode 100644 index 865258d..0000000 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core_cpu.h +++ /dev/null @@ -1,338 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -/// A define for a true value in a boolean expression. -/// -/// @ingroup CPUTypes -#define FFX_TRUE (1) - -/// A define for a false value in a boolean expression. -/// -/// @ingroup CPUTypes -#define FFX_FALSE (0) - -#if !defined(FFX_STATIC) -/// A define to abstract declaration of static variables and functions. -/// -/// @ingroup CPUTypes -#define FFX_STATIC static -#endif // #if !defined(FFX_STATIC) - -/// @defgroup CPUCore CPU Core -/// Core CPU-side defines and functions -/// -/// @ingroup ffxHost - -#ifdef __clang__ -#pragma clang diagnostic ignored "-Wunused-variable" -#endif - -/// Interpret the bit layout of an IEEE-754 floating point value as an unsigned integer. -/// -/// @param [in] x A 32bit floating value. -/// -/// @returns -/// An unsigned 32bit integer value containing the bit pattern of x. -/// -/// @ingroup CPUCore -FFX_STATIC FfxUInt32 ffxAsUInt32(FfxFloat32 x) -{ - union - { - FfxFloat32 f; - FfxUInt32 u; - } bits; - - bits.f = x; - return bits.u; -} - -FFX_STATIC FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b) -{ - return a[0] * b[0] + a[1] * b[1]; -} - -FFX_STATIC FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b) -{ - return a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; -} - -FFX_STATIC FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b) -{ - return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3]; -} - -/// Compute the linear interopation between two values. -/// -/// Implemented by calling the GLSL mix instrinsic function. Implements the -/// following math: -/// -/// (1 - t) * x + t * y -/// -/// @param [in] x The first value to lerp between. -/// @param [in] y The second value to lerp between. -/// @param [in] t The value to determine how much of x and how much of y. -/// -/// @returns -/// A linearly interpolated value between x and y according to t. -/// -/// @ingroup CPUCore -FFX_STATIC FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t) -{ - return y * t + (-x * t + x); -} - -/// Compute the reciprocal of a value. -/// -/// @param [in] x The value to compute the reciprocal for. -/// -/// @returns -/// The reciprocal value of x. -/// -/// @ingroup CPUCore -FFX_STATIC FfxFloat32 ffxReciprocal(FfxFloat32 x) -{ - return 1.0f / x; -} - -/// Compute the square root of a value. -/// -/// @param [in] x The first value to compute the min of. -/// -/// @returns -/// The the square root of x. -/// -/// @ingroup CPUCore -FFX_STATIC FfxFloat32 ffxSqrt(FfxFloat32 x) -{ - return sqrt(x); -} - -FFX_STATIC FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b) -{ - return FfxUInt32(FfxInt32(a) >> FfxInt32(b)); -} - -/// Compute the factional part of a decimal value. -/// -/// This function calculates x - floor(x). -/// -/// @param [in] x The value to compute the fractional part from. -/// -/// @returns -/// The fractional part of x. -/// -/// @ingroup CPUCore -FFX_STATIC FfxFloat32 ffxFract(FfxFloat32 x) -{ - return x - floor(x); -} - -/// Compute the reciprocal square root of a value. -/// -/// @param [in] x The value to compute the reciprocal for. -/// -/// @returns -/// The reciprocal square root value of x. -/// -/// @ingroup CPUCore -FFX_STATIC FfxFloat32 rsqrt(FfxFloat32 x) -{ - return ffxReciprocal(ffxSqrt(x)); -} - -FFX_STATIC FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y) -{ - return x < y ? x : y; -} - -FFX_STATIC FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y) -{ - return x < y ? x : y; -} - -FFX_STATIC FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y) -{ - return x > y ? x : y; -} - -FFX_STATIC FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y) -{ - return x > y ? x : y; -} - -/// Clamp a value to a [0..1] range. -/// -/// @param [in] x The value to clamp to [0..1] range. -/// -/// @returns -/// The clamped version of x. -/// -/// @ingroup CPUCore -FFX_STATIC FfxFloat32 ffxSaturate(FfxFloat32 x) -{ - return ffxMin(1.0f, ffxMax(0.0f, x)); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -FFX_STATIC void opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) -{ - d[0] = a[0] + b; - d[1] = a[1] + b; - d[2] = a[2] + b; - return; -} - -FFX_STATIC void opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a) -{ - d[0] = a[0]; - d[1] = a[1]; - d[2] = a[2]; - return; -} - -FFX_STATIC void opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b) -{ - d[0] = a[0] * b[0]; - d[1] = a[1] * b[1]; - d[2] = a[2] * b[2]; - return; -} - -FFX_STATIC void opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) -{ - d[0] = a[0] * b; - d[1] = a[1] * b; - d[2] = a[2] * b; - return; -} - -FFX_STATIC void opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a) -{ - d[0] = ffxReciprocal(a[0]); - d[1] = ffxReciprocal(a[1]); - d[2] = ffxReciprocal(a[2]); - return; -} - -/// Convert FfxFloat32 to half (in lower 16-bits of output). -/// -/// This function implements the same fast technique that is documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf -/// -/// The function supports denormals. -/// -/// Some conversion rules are to make computations possibly "safer" on the GPU, -/// -INF & -NaN -> -65504 -/// +INF & +NaN -> +65504 -/// -/// @param [in] f The 32bit floating point value to convert. -/// -/// @returns -/// The closest 16bit floating point value to f. -/// -/// @ingroup CPUCore -FFX_STATIC FfxUInt32 f32tof16(FfxFloat32 f) -{ - static FfxUInt16 base[512] = { - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, - 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, - 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, - 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, - 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, - 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, 0xf000, 0xf400, 0xf800, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, - 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff - }; - - static FfxUInt8 shift[512] = { - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18 - }; - - union - { - FfxFloat32 f; - FfxUInt32 u; - } bits; - - bits.f = f; - FfxUInt32 u = bits.u; - FfxUInt32 i = u >> 23; - return (FfxUInt32)(base[i]) + ((u & 0x7fffff) >> shift[i]); -} - -/// Pack 2x32-bit floating point values in a single 32bit value. -/// -/// This function first converts each component of value into their nearest 16-bit floating -/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the -/// 32bit unsigned integer respectively. -/// -/// @param [in] x A 2-dimensional floating point value to convert and pack. -/// -/// @returns -/// A packed 32bit value containing 2 16bit floating point values. -/// -/// @ingroup CPUCore -FFX_STATIC FfxUInt32 packHalf2x16(FfxFloat32x2 x) -{ - return f32tof16(x[0]) + (f32tof16(x[1]) << 16); -} diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core_cpu.h.meta b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core_cpu.h.meta deleted file mode 100644 index f6508bc..0000000 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core_cpu.h.meta +++ /dev/null @@ -1,65 +0,0 @@ -fileFormatVersion: 2 -guid: 4c88c0b7a4dec1e479272449c19ca981 -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - : Any - second: - enabled: 0 - settings: - Exclude Editor: 1 - Exclude GameCoreScarlett: 1 - Exclude GameCoreXboxOne: 1 - Exclude Linux64: 1 - Exclude OSXUniversal: 1 - Exclude PS4: 1 - Exclude PS5: 1 - Exclude Win: 1 - Exclude Win64: 1 - - first: - Any: - second: - enabled: 0 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - - first: - Standalone: Linux64 - second: - enabled: 0 - settings: - CPU: None - - first: - Standalone: OSXUniversal - second: - enabled: 0 - settings: - CPU: None - - first: - Standalone: Win - second: - enabled: 0 - settings: - CPU: None - - first: - Standalone: Win64 - second: - enabled: 0 - settings: - CPU: None - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common.h index 2f687df..9f88c94 100644 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common.h +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common.h @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,7 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - /// A define for a true value in a boolean expression. /// /// @ingroup GPUCore @@ -49,57 +49,7 @@ FFX_STATIC const FfxFloat32 FFX_FP16_MIN = 6.10e-05f; FFX_STATIC const FfxFloat32 FFX_FP16_MAX = 65504.0f; FFX_STATIC const FfxFloat32 FFX_TONEMAP_EPSILON = 1.0f / FFX_FP16_MAX; -/// Compute the reciprocal of value. -/// -/// @param [in] value The value to compute the reciprocal of. -/// -/// @returns -/// The 1 / value. -/// -/// @ingroup GPUCore -FfxFloat32 ffxReciprocal(FfxFloat32 value) -{ - return rcp(value); -} - -/// Compute the reciprocal of value. -/// -/// @param [in] value The value to compute the reciprocal of. -/// -/// @returns -/// The 1 / value. -/// -/// @ingroup GPUCore -FfxFloat32x2 ffxReciprocal(FfxFloat32x2 value) -{ - return rcp(value); -} - -/// Compute the reciprocal of value. -/// -/// @param [in] value The value to compute the reciprocal of. -/// -/// @returns -/// The 1 / value. -/// -/// @ingroup GPUCore -FfxFloat32x3 ffxReciprocal(FfxFloat32x3 value) -{ - return rcp(value); -} - -/// Compute the reciprocal of value. -/// -/// @param [in] value The value to compute the reciprocal of. -/// -/// @returns -/// The 1 / value. -/// -/// @ingroup GPUCore -FfxFloat32x4 ffxReciprocal(FfxFloat32x4 value) -{ - return rcp(value); -} +#define FFX_HAS_FLAG(v, f) ((v & f) == f) /// Compute the min of two values. /// @@ -782,7 +732,7 @@ FfxFloat32x4 ffxIsGreaterThanZero(FfxFloat32x4 m) /// @ingroup GPUCore FfxUInt32 ffxFloatToSortableInteger(FfxUInt32 value) { - return value ^ ((AShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000)); + return value ^ ((ffxAShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000)); } /// Convert a sortable integer to a 32bit floating point value. @@ -799,7 +749,7 @@ FfxUInt32 ffxFloatToSortableInteger(FfxUInt32 value) /// @ingroup GPUCore FfxUInt32 ffxSortableIntegerToFloat(FfxUInt32 value) { - return value ^ ((~AShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000)); + return value ^ ((~ffxAShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000)); } /// Calculate a low-quality approximation for the square root of a value. @@ -2408,6 +2358,51 @@ FfxFloat32x3 ffxRec709FromLinear(FfxFloat32x3 color) return clamp(j.xxx, color * j.yyy, pow(color, j.zzz) * k.xxx + k.yyy); } +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] color The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxLinearFromRec709(FfxFloat32 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.x), color * j.y, pow(color * k.x + k.y, j.z)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] color The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxLinearFromRec709(FfxFloat32x2 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xx), color * j.yy, pow(color * k.xx + k.yy, j.zz)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] color The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxLinearFromRec709(FfxFloat32x3 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xxx), color * j.yyy, pow(color * k.xxx + k.yyy, j.zzz)); +} + /// Compute a gamma value from a linear value. /// /// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. @@ -2462,232 +2457,187 @@ FfxFloat32x3 ffxGammaFromLinear(FfxFloat32x3 value, FfxFloat32 power) return pow(value, ffxBroadcast3(power)); } -/// Compute a PQ value from a linear value. -/// -/// @param [in] value The value to convert to PQ from linear. -/// -/// @returns -/// A value in linear space. +/// Compute a linear value from a value in a gamma space. /// -/// @ingroup GPUCore -FfxFloat32 ffxPQToLinear(FfxFloat32 value) -{ - FfxFloat32 p = pow(value, FfxFloat32(0.159302)); - return pow((FfxFloat32(0.835938) + FfxFloat32(18.8516) * p) / (FfxFloat32(1.0) + FfxFloat32(18.6875) * p), FfxFloat32(78.8438)); -} - -/// Compute a PQ value from a linear value. +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. /// -/// @param [in] value The value to convert to PQ from linear. +/// @param [in] color The value to convert to linear in gamma space. +/// @param [in] power The power value used for the gamma curve. /// /// @returns /// A value in linear space. /// /// @ingroup GPUCore -FfxFloat32x2 ffxPQToLinear(FfxFloat32x2 value) +FfxFloat32 ffxLinearFromGamma(FfxFloat32 color, FfxFloat32 power) { - FfxFloat32x2 p = pow(value, ffxBroadcast2(0.159302)); - return pow((ffxBroadcast2(0.835938) + ffxBroadcast2(18.8516) * p) / (ffxBroadcast2(1.0) + ffxBroadcast2(18.6875) * p), ffxBroadcast2(78.8438)); + return pow(color, FfxFloat32(power)); } -/// Compute a PQ value from a linear value. -/// -/// @param [in] value The value to convert to PQ from linear. -/// -/// @returns -/// A value in linear space. +/// Compute a linear value from a value in a gamma space. /// -/// @ingroup GPUCore -FfxFloat32x3 ffxPQToLinear(FfxFloat32x3 value) -{ - FfxFloat32x3 p = pow(value, ffxBroadcast3(0.159302)); - return pow((ffxBroadcast3(0.835938) + ffxBroadcast3(18.8516) * p) / (ffxBroadcast3(1.0) + ffxBroadcast3(18.6875) * p), ffxBroadcast3(78.8438)); -} - -/// Compute a linear value from a SRGB value. +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. /// -/// @param [in] value The value to convert to linear from SRGB. +/// @param [in] color The value to convert to linear in gamma space. +/// @param [in] power The power value used for the gamma curve. /// /// @returns -/// A value in SRGB space. +/// A value in linear space. /// /// @ingroup GPUCore -FfxFloat32 ffxSrgbToLinear(FfxFloat32 value) +FfxFloat32x2 ffxLinearFromGamma(FfxFloat32x2 color, FfxFloat32 power) { - FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); - FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); - return clamp(j.x, value * j.y, pow(value, j.z) * k.x + k.y); + return pow(color, ffxBroadcast2(power)); } -/// Compute a linear value from a SRGB value. -/// -/// @param [in] value The value to convert to linear from SRGB. -/// -/// @returns -/// A value in SRGB space. +/// Compute a linear value from a value in a gamma space. /// -/// @ingroup GPUCore -FfxFloat32x2 ffxSrgbToLinear(FfxFloat32x2 value) -{ - FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); - FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); - return clamp(j.xx, value * j.yy, pow(value, j.zz) * k.xx + k.yy); -} - -/// Compute a linear value from a SRGB value. +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. /// -/// @param [in] value The value to convert to linear from SRGB. +/// @param [in] color The value to convert to linear in gamma space. +/// @param [in] power The power value used for the gamma curve. /// /// @returns -/// A value in SRGB space. +/// A value in linear space. /// /// @ingroup GPUCore -FfxFloat32x3 ffxSrgbToLinear(FfxFloat32x3 value) +FfxFloat32x3 ffxLinearFromGamma(FfxFloat32x3 color, FfxFloat32 power) { - FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); - FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); - return clamp(j.xxx, value * j.yyy, pow(value, j.zzz) * k.xxx + k.yyy); + return pow(color, ffxBroadcast3(power)); } -/// Compute a linear value from a REC.709 value. +/// Compute a PQ value from a linear value. /// -/// @param [in] color The value to convert to linear from REC.709. +/// @param [in] value The value to convert to PQ from linear. /// /// @returns /// A value in linear space. /// /// @ingroup GPUCore -FfxFloat32 ffxLinearFromRec709(FfxFloat32 color) +FfxFloat32 ffxPQFromLinear(FfxFloat32 value) { - FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); - FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); - return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.x), color * j.y, pow(color * k.x + k.y, j.z)); + FfxFloat32 p = pow(value, FfxFloat32(0.159302)); + return pow((FfxFloat32(0.835938) + FfxFloat32(18.8516) * p) / (FfxFloat32(1.0) + FfxFloat32(18.6875) * p), FfxFloat32(78.8438)); } -/// Compute a linear value from a REC.709 value. +/// Compute a PQ value from a linear value. /// -/// @param [in] color The value to convert to linear from REC.709. +/// @param [in] value The value to convert to PQ from linear. /// /// @returns /// A value in linear space. /// /// @ingroup GPUCore -FfxFloat32x2 ffxLinearFromRec709(FfxFloat32x2 color) +FfxFloat32x2 ffxPQFromLinear(FfxFloat32x2 value) { - FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); - FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); - return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xx), color * j.yy, pow(color * k.xx + k.yy, j.zz)); + FfxFloat32x2 p = pow(value, ffxBroadcast2(0.159302)); + return pow((ffxBroadcast2(0.835938) + ffxBroadcast2(18.8516) * p) / (ffxBroadcast2(1.0) + ffxBroadcast2(18.6875) * p), ffxBroadcast2(78.8438)); } -/// Compute a linear value from a REC.709 value. +/// Compute a PQ value from a linear value. /// -/// @param [in] color The value to convert to linear from REC.709. +/// @param [in] value The value to convert to PQ from linear. /// /// @returns /// A value in linear space. /// /// @ingroup GPUCore -FfxFloat32x3 ffxLinearFromRec709(FfxFloat32x3 color) +FfxFloat32x3 ffxPQFromLinear(FfxFloat32x3 value) { - FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); - FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); - return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xxx), color * j.yyy, pow(color * k.xxx + k.yyy, j.zzz)); + FfxFloat32x3 p = pow(value, ffxBroadcast3(0.159302)); + return pow((ffxBroadcast3(0.835938) + ffxBroadcast3(18.8516) * p) / (ffxBroadcast3(1.0) + ffxBroadcast3(18.6875) * p), ffxBroadcast3(78.8438)); } -/// Compute a linear value from a value in a gamma space. +/// Compute a linear value from a value in a PQ space. /// /// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. /// -/// @param [in] color The value to convert to linear in gamma space. -/// @param [in] power The power value used for the gamma curve. +/// @param [in] value The value to convert to linear in PQ space. /// /// @returns /// A value in linear space. /// /// @ingroup GPUCore -FfxFloat32 ffxLinearFromGamma(FfxFloat32 color, FfxFloat32 power) +FfxFloat32 ffxLinearFromPQ(FfxFloat32 value) { - return pow(color, FfxFloat32(power)); + FfxFloat32 p = pow(value, FfxFloat32(0.0126833)); + return pow(ffxSaturate(p - FfxFloat32(0.835938)) / (FfxFloat32(18.8516) - FfxFloat32(18.6875) * p), FfxFloat32(6.27739)); } -/// Compute a linear value from a value in a gamma space. +/// Compute a linear value from a value in a PQ space. /// /// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. /// -/// @param [in] color The value to convert to linear in gamma space. -/// @param [in] power The power value used for the gamma curve. +/// @param [in] value The value to convert to linear in PQ space. /// /// @returns /// A value in linear space. /// /// @ingroup GPUCore -FfxFloat32x2 ffxLinearFromGamma(FfxFloat32x2 color, FfxFloat32 power) +FfxFloat32x2 ffxLinearFromPQ(FfxFloat32x2 value) { - return pow(color, ffxBroadcast2(power)); + FfxFloat32x2 p = pow(value, ffxBroadcast2(0.0126833)); + return pow(ffxSaturate(p - ffxBroadcast2(0.835938)) / (ffxBroadcast2(18.8516) - ffxBroadcast2(18.6875) * p), ffxBroadcast2(6.27739)); } -/// Compute a linear value from a value in a gamma space. +/// Compute a linear value from a value in a PQ space. /// /// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. /// -/// @param [in] color The value to convert to linear in gamma space. -/// @param [in] power The power value used for the gamma curve. +/// @param [in] value The value to convert to linear in PQ space. /// /// @returns /// A value in linear space. /// /// @ingroup GPUCore -FfxFloat32x3 ffxLinearFromGamma(FfxFloat32x3 color, FfxFloat32 power) +FfxFloat32x3 ffxLinearFromPQ(FfxFloat32x3 value) { - return pow(color, ffxBroadcast3(power)); + FfxFloat32x3 p = pow(value, ffxBroadcast3(0.0126833)); + return pow(ffxSaturate(p - ffxBroadcast3(0.835938)) / (ffxBroadcast3(18.8516) - ffxBroadcast3(18.6875) * p), ffxBroadcast3(6.27739)); } -/// Compute a linear value from a value in a PQ space. +/// Compute an SRGB value from a linear value. /// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// @param [in] value The value to convert to linear in PQ space. +/// @param [in] value The value to convert to SRGB from linear. /// /// @returns -/// A value in linear space. +/// A value in SRGB space. /// /// @ingroup GPUCore -FfxFloat32 ffxLinearFromPQ(FfxFloat32 value) +FfxFloat32 ffxSrgbFromLinear(FfxFloat32 value) { - FfxFloat32 p = pow(value, FfxFloat32(0.0126833)); - return pow(ffxSaturate(p - FfxFloat32(0.835938)) / (FfxFloat32(18.8516) - FfxFloat32(18.6875) * p), FfxFloat32(6.27739)); + FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); + return clamp(j.x, value * j.y, pow(value, j.z) * k.x + k.y); } -/// Compute a linear value from a value in a PQ space. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// Compute an SRGB value from a linear value. /// -/// @param [in] value The value to convert to linear in PQ space. +/// @param [in] value The value to convert to SRGB from linear. /// /// @returns -/// A value in linear space. +/// A value in SRGB space. /// /// @ingroup GPUCore -FfxFloat32x2 ffxLinearFromPQ(FfxFloat32x2 value) +FfxFloat32x2 ffxSrgbFromLinear(FfxFloat32x2 value) { - FfxFloat32x2 p = pow(value, ffxBroadcast2(0.0126833)); - return pow(ffxSaturate(p - ffxBroadcast2(0.835938)) / (ffxBroadcast2(18.8516) - ffxBroadcast2(18.6875) * p), ffxBroadcast2(6.27739)); + FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); + return clamp(j.xx, value * j.yy, pow(value, j.zz) * k.xx + k.yy); } -/// Compute a linear value from a value in a PQ space. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// Compute an SRGB value from a linear value. /// -/// @param [in] value The value to convert to linear in PQ space. +/// @param [in] value The value to convert to SRGB from linear. /// /// @returns -/// A value in linear space. +/// A value in SRGB space. /// /// @ingroup GPUCore -FfxFloat32x3 ffxLinearFromPQ(FfxFloat32x3 value) +FfxFloat32x3 ffxSrgbFromLinear(FfxFloat32x3 value) { - FfxFloat32x3 p = pow(value, ffxBroadcast3(0.0126833)); - return pow(ffxSaturate(p - ffxBroadcast3(0.835938)) / (ffxBroadcast3(18.8516) - ffxBroadcast3(18.6875) * p), ffxBroadcast3(6.27739)); + FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); + return clamp(j.xxx, value * j.yyy, pow(value, j.zzz) * k.xxx + k.yyy); } /// Compute a linear value from a value in a SRGB space. @@ -2742,11 +2692,13 @@ FfxFloat32x3 ffxLinearFromSrgb(FfxFloat32x3 value) } /// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear. -/// -/// 543210 -/// ====== -/// ..xxx. -/// yy...y +/// +/// Remap illustration: +/// +/// 543210 +/// ~~~~~~ +/// ..xxx. +/// yy...y /// /// @param [in] a The input 1D coordinates to remap. /// @@ -2756,7 +2708,7 @@ FfxFloat32x3 ffxLinearFromSrgb(FfxFloat32x3 value) /// @ingroup GPUCore FfxUInt32x2 ffxRemapForQuad(FfxUInt32 a) { - return FfxUInt32x2(bitfieldExtract(a, 1u, 3u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), a, 1u)); + return FfxUInt32x2(ffxBitfieldExtract(a, 1u, 3u), ffxBitfieldInsertMask(ffxBitfieldExtract(a, 3u, 3u), a, 1u)); } /// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions. @@ -2780,5 +2732,5 @@ FfxUInt32x2 ffxRemapForQuad(FfxUInt32 a) /// @ingroup GPUCore FfxUInt32x2 ffxRemapForWaveReduction(FfxUInt32 a) { - return FfxUInt32x2(bitfieldInsertMask(bitfieldExtract(a, 2u, 3u), a, 1u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), bitfieldExtract(a, 1u, 2u), 2u)); + return FfxUInt32x2(ffxBitfieldInsertMask(ffxBitfieldExtract(a, 2u, 3u), a, 1u), ffxBitfieldInsertMask(ffxBitfieldExtract(a, 3u, 3u), ffxBitfieldExtract(a, 1u, 2u), 2u)); } diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common_half.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common_half.h index 4c73daf..1cb780b 100644 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common_half.h +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common_half.h @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,7 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - #if FFX_HALF #if FFX_HLSL_SM >= 62 /// A define value for 16bit positive infinity. @@ -563,7 +563,7 @@ FfxFloat16x4 ffxCopySignBitHalf(FfxFloat16x4 d, FfxFloat16x4 s) /// @ingroup GPUCore FfxFloat16 ffxIsSignedHalf(FfxFloat16 m) { - return FfxFloat16(ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF))); + return ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF)); } /// A single operation to return the following: @@ -586,7 +586,7 @@ FfxFloat16 ffxIsSignedHalf(FfxFloat16 m) /// @ingroup GPUCore FfxFloat16x2 ffxIsSignedHalf(FfxFloat16x2 m) { - return FfxFloat16x2(ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF))); + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF)); } /// A single operation to return the following: @@ -609,7 +609,7 @@ FfxFloat16x2 ffxIsSignedHalf(FfxFloat16x2 m) /// @ingroup GPUCore FfxFloat16x3 ffxIsSignedHalf(FfxFloat16x3 m) { - return FfxFloat16x3(ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF))); + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF)); } /// A single operation to return the following: @@ -632,7 +632,7 @@ FfxFloat16x3 ffxIsSignedHalf(FfxFloat16x3 m) /// @ingroup GPUCore FfxFloat16x4 ffxIsSignedHalf(FfxFloat16x4 m) { - return FfxFloat16x4(ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF))); + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF)); } /// A single operation to return the following: @@ -650,7 +650,7 @@ FfxFloat16x4 ffxIsSignedHalf(FfxFloat16x4 m) /// @ingroup GPUCore FfxFloat16 ffxIsGreaterThanZeroHalf(FfxFloat16 m) { - return FfxFloat16(ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF))); + return ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF)); } /// A single operation to return the following: @@ -668,7 +668,7 @@ FfxFloat16 ffxIsGreaterThanZeroHalf(FfxFloat16 m) /// @ingroup GPUCore FfxFloat16x2 ffxIsGreaterThanZeroHalf(FfxFloat16x2 m) { - return FfxFloat16x2(ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF))); + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF)); } /// A single operation to return the following: @@ -686,7 +686,7 @@ FfxFloat16x2 ffxIsGreaterThanZeroHalf(FfxFloat16x2 m) /// @ingroup GPUCore FfxFloat16x3 ffxIsGreaterThanZeroHalf(FfxFloat16x3 m) { - return FfxFloat16x3(ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF))); + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF)); } /// A single operation to return the following: @@ -704,7 +704,7 @@ FfxFloat16x3 ffxIsGreaterThanZeroHalf(FfxFloat16x3 m) /// @ingroup GPUCore FfxFloat16x4 ffxIsGreaterThanZeroHalf(FfxFloat16x4 m) { - return FfxFloat16x4(ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF))); + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF)); } /// Convert a 16bit floating point value to sortable integer. @@ -2223,7 +2223,7 @@ FfxFloat16x4 ffxSignedZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y) /// @ingroup GPUCore FfxFloat16 ffxZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) { - return FfxFloat16(ffxSaturate(x * y + z)); + return ffxSaturate(x * y + z); } /// Conditional free logic AND operation using two half-precision values followed by @@ -2239,7 +2239,7 @@ FfxFloat16 ffxZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) /// @ingroup GPUCore FfxFloat16x2 ffxZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) { - return FfxFloat16x2(ffxSaturate(x * y + z)); + return ffxSaturate(x * y + z); } /// Conditional free logic AND operation using two half-precision values followed by @@ -2255,7 +2255,7 @@ FfxFloat16x2 ffxZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) /// @ingroup GPUCore FfxFloat16x3 ffxZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) { - return FfxFloat16x3(ffxSaturate(x * y + z)); + return ffxSaturate(x * y + z); } /// Conditional free logic AND operation using two half-precision values followed by @@ -2271,7 +2271,7 @@ FfxFloat16x3 ffxZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) /// @ingroup GPUCore FfxFloat16x4 ffxZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) { - return FfxFloat16x4(ffxSaturate(x * y + z)); + return ffxSaturate(x * y + z); } /// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. @@ -2284,7 +2284,7 @@ FfxFloat16x4 ffxZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) /// @ingroup GPUCore FfxFloat16 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16 x) { - return FfxFloat16(ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF))); + return ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF)); } /// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. @@ -2297,7 +2297,7 @@ FfxFloat16 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16 x) /// @ingroup GPUCore FfxFloat16x2 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x2 x) { - return FfxFloat16x2(ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF))); + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF)); } /// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. @@ -2310,7 +2310,7 @@ FfxFloat16x2 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x2 x) /// @ingroup GPUCore FfxFloat16x3 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x3 x) { - return FfxFloat16x3(ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF))); + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF)); } /// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. @@ -2323,7 +2323,7 @@ FfxFloat16x3 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x3 x) /// @ingroup GPUCore FfxFloat16x4 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x4 x) { - return FfxFloat16x4(ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF))); + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF)); } /// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. @@ -2508,7 +2508,7 @@ FfxFloat16x4 ffxZeroOneSelectHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z /// @ingroup GPUCore FfxFloat16 ffxZeroOneIsSignedHalf(FfxFloat16 x) { - return FfxFloat16(ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF))); + return ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF)); } /// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. @@ -2521,7 +2521,7 @@ FfxFloat16 ffxZeroOneIsSignedHalf(FfxFloat16 x) /// @ingroup GPUCore FfxFloat16x2 ffxZeroOneIsSignedHalf(FfxFloat16x2 x) { - return FfxFloat16x2(ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF))); + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF)); } /// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. @@ -2534,7 +2534,7 @@ FfxFloat16x2 ffxZeroOneIsSignedHalf(FfxFloat16x2 x) /// @ingroup GPUCore FfxFloat16x3 ffxZeroOneIsSignedHalf(FfxFloat16x3 x) { - return FfxFloat16x3(ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF))); + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF)); } /// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. @@ -2547,7 +2547,7 @@ FfxFloat16x3 ffxZeroOneIsSignedHalf(FfxFloat16x3 x) /// @ingroup GPUCore FfxFloat16x4 ffxZeroOneIsSignedHalf(FfxFloat16x4 x) { - return FfxFloat16x4(ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF))); + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF)); } /// Compute a Rec.709 color space. @@ -2936,10 +2936,12 @@ FfxFloat16x3 ffxLinearFromSrgbHalf(FfxFloat16x3 c) /// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear. /// -/// 543210 -/// ====== -/// ..xxx. -/// yy...y +/// Remap illustration: +/// +/// 543210 +/// ~~~~~~ +/// ..xxx. +/// yy...y /// /// @param [in] a The input 1D coordinates to remap. /// @@ -2949,7 +2951,7 @@ FfxFloat16x3 ffxLinearFromSrgbHalf(FfxFloat16x3 c) /// @ingroup GPUCore FfxUInt16x2 ffxRemapForQuadHalf(FfxUInt32 a) { - return FfxUInt16x2(bitfieldExtract(a, 1u, 3u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), a, 1u)); + return FfxUInt16x2(ffxBitfieldExtract(a, 1u, 3u), ffxBitfieldInsertMask(ffxBitfieldExtract(a, 3u, 3u), a, 1u)); } /// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions. @@ -2973,7 +2975,7 @@ FfxUInt16x2 ffxRemapForQuadHalf(FfxUInt32 a) /// @ingroup GPUCore FfxUInt16x2 ffxRemapForWaveReductionHalf(FfxUInt32 a) { - return FfxUInt16x2(bitfieldInsertMask(bitfieldExtract(a, 2u, 3u), a, 1u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), bitfieldExtract(a, 1u, 2u), 2u)); + return FfxUInt16x2(ffxBitfieldInsertMask(ffxBitfieldExtract(a, 2u, 3u), a, 1u), ffxBitfieldInsertMask(ffxBitfieldExtract(a, 3u, 3u), ffxBitfieldExtract(a, 1u, 2u), 2u)); } #endif // FFX_HALF diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core_hlsl.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core_hlsl.h index 337eb06..28827d9 100644 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core_hlsl.h +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core_hlsl.h @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,7 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - /// @defgroup HLSLCore HLSL Core /// HLSL core defines and functions /// @@ -32,6 +32,19 @@ #define FFX_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex)) #define FFX_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex)) +/// A define for abstracting select functionality for pre/post HLSL 21 +/// +/// @ingroup HLSLCore +#if __HLSL_VERSION >= 2021 + +#define FFX_SELECT(cond, arg1, arg2) select(cond, arg1, arg2) + +#else // #if __HLSL_VERSION >= 2021 + +#define FFX_SELECT(cond, arg1, arg2) cond ? arg1 : arg2 + +#endif // #if __HLSL_VERSION >= 2021 + /// A define for abstracting shared memory between shading languages. /// /// @ingroup HLSLCore @@ -40,13 +53,33 @@ /// A define for abstracting compute memory barriers between shading languages. /// /// @ingroup HLSLCore -#define FFX_GROUP_MEMORY_BARRIER GroupMemoryBarrierWithGroupSync +#define FFX_GROUP_MEMORY_BARRIER GroupMemoryBarrierWithGroupSync() /// A define for abstracting compute atomic additions between shading languages. /// /// @ingroup HLSLCore #define FFX_ATOMIC_ADD(x, y) InterlockedAdd(x, y) +/// A define for abstracting compute atomic additions between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_ATOMIC_ADD_RETURN(x, y, r) InterlockedAdd(x, y, r) + +/// A define for abstracting compute atomic OR between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_ATOMIC_OR(x, y) InterlockedOr(x, y) + +/// A define for abstracting compute atomic min between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_ATOMIC_MIN(x, y) InterlockedMin(x, y) + +/// A define for abstracting compute atomic max between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_ATOMIC_MAX(x, y) InterlockedMax(x, y) + /// A define added to accept static markup on functions to aid CPU/GPU portability of code. /// /// @ingroup HLSLCore @@ -222,6 +255,24 @@ /// @ingroup HLSLCore #define FFX_BROADCAST_MIN_INT16X4(a) FFX_MIN16_I(a) +/// Convert FfxFloat32 to half (in lower 16-bits of output). +/// +/// This function implements the same fast technique that is documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf +/// +/// The function supports denormals. +/// +/// Some conversion rules are to make computations possibly "safer" on the GPU, +/// -INF & -NaN -> -65504 +/// +INF & +NaN -> +65504 +/// +/// @param [in] f The 32bit floating point value to convert. +/// +/// @returns +/// The closest 16bit floating point value to f. +/// +/// @ingroup HLSLCore +#define ffxF32ToF16 f32tof16 + /// Pack 2x32-bit floating point values in a single 32bit value. /// /// This function first converts each component of value into their nearest 16-bit floating @@ -234,9 +285,9 @@ /// A packed 32bit value containing 2 16bit floating point values. /// /// @ingroup HLSLCore -FfxUInt32 packHalf2x16(FfxFloat32x2 value) +FfxUInt32 ffxPackHalf2x16(FfxFloat32x2 value) { - return f32tof16(value.x) | (f32tof16(value.y) << 16); + return ffxF32ToF16(value.x) | (ffxF32ToF16(value.y) << 16); } /// Broadcast a scalar value to a 2-dimensional floating point vector. @@ -299,9 +350,9 @@ FfxInt32x2 ffxBroadcast2(FfxInt32 value) /// A 3-dimensional signed integer vector with value in each component. /// /// @ingroup HLSLCore -FfxUInt32x3 ffxBroadcast3(FfxInt32 value) +FfxInt32x3 ffxBroadcast3(FfxInt32 value) { - return FfxUInt32x3(value, value, value); + return FfxInt32x3(value, value, value); } /// Broadcast a scalar value to a 4-dimensional signed integer vector. @@ -356,18 +407,18 @@ FfxUInt32x4 ffxBroadcast4(FfxUInt32 value) return FfxUInt32x4(value, value, value, value); } -FfxUInt32 bitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits) +FfxUInt32 ffxBitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits) { FfxUInt32 mask = (1u << bits) - 1; return (src >> off) & mask; } -FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask) +FfxUInt32 ffxBitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask) { return (ins & mask) | (src & (~mask)); } -FfxUInt32 bitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits) +FfxUInt32 ffxBitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits) { FfxUInt32 mask = (1u << bits) - 1; return (ins & mask) | (src & (~mask)); @@ -477,6 +528,110 @@ FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x) return asfloat(x); } +/// Compute the inverse of a value. +/// +/// @param [in] x The value to calulate the inverse of. +/// +/// @returns +/// The inverse of x. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxReciprocal(FfxFloat32 x) +{ + return rcp(x); +} + +/// Compute the inverse of a value. +/// +/// @param [in] x The value to calulate the inverse of. +/// +/// @returns +/// The inverse of x. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxReciprocal(FfxFloat32x2 x) +{ + return rcp(x); +} + +/// Compute the inverse of a value. +/// +/// @param [in] x The value to calulate the inverse of. +/// +/// @returns +/// The inverse of x. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxReciprocal(FfxFloat32x3 x) +{ + return rcp(x); +} + +/// Compute the inverse of a value. +/// +/// @param [in] x The value to calulate the inverse of. +/// +/// @returns +/// The inverse of x. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxReciprocal(FfxFloat32x4 x) +{ + return rcp(x); +} + +/// Compute the inverse square root of a value. +/// +/// @param [in] x The value to calulate the inverse square root of. +/// +/// @returns +/// The inverse square root of x. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxRsqrt(FfxFloat32 x) +{ + return rsqrt(x); +} + +/// Compute the inverse square root of a value. +/// +/// @param [in] x The value to calulate the inverse square root of. +/// +/// @returns +/// The inverse square root of x. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxRsqrt(FfxFloat32x2 x) +{ + return rsqrt(x); +} + +/// Compute the inverse square root of a value. +/// +/// @param [in] x The value to calulate the inverse square root of. +/// +/// @returns +/// The inverse square root of x. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxRsqrt(FfxFloat32x3 x) +{ + return rsqrt(x); +} + +/// Compute the inverse square root of a value. +/// +/// @param [in] x The value to calulate the inverse square root of. +/// +/// @returns +/// The inverse square root of x. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxRsqrt(FfxFloat32x4 x) +{ + return rsqrt(x); +} + /// Compute the linear interopation between two values. /// /// Implemented by calling the HLSL mix instrinsic function. Implements the @@ -745,6 +900,58 @@ FfxFloat32x4 ffxFract(FfxFloat32x4 x) return x - floor(x); } +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxRound(FfxFloat32 x) +{ + return round(x); +} + +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxRound(FfxFloat32x2 x) +{ + return round(x); +} + +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxRound(FfxFloat32x3 x) +{ + return round(x); +} + +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxRound(FfxFloat32x4 x) +{ + return round(x); +} + /// Compute the maximum of three values. /// /// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. @@ -1158,13 +1365,13 @@ FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) } -FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b) +FfxUInt32 ffxAShrSU1(FfxUInt32 a, FfxUInt32 b) { return FfxUInt32(FfxInt32(a) >> FfxInt32(b)); } FfxUInt32 ffxPackF32(FfxFloat32x2 v){ - FfxUInt32x2 p = FfxUInt32x2(f32tof16(FfxFloat32x2(v).x), f32tof16(FfxFloat32x2(v).y)); + FfxUInt32x2 p = FfxUInt32x2(ffxF32ToF16(FfxFloat32x2(v).x), ffxF32ToF16(FfxFloat32x2(v).y)); return p.x | (p.y << 16); } @@ -1172,6 +1379,14 @@ FfxFloat32x2 ffxUnpackF32(FfxUInt32 a){ return f16tof32(FfxUInt32x2(a & 0xFFFF, a >> 16)); } +FfxUInt32x2 ffxPackF32x2(FfxFloat32x4 v){ + return FfxUInt32x2(ffxPackF32(v.xy), ffxPackF32(v.zw)); +} + +FfxFloat32x4 ffxUnpackF32x2(FfxUInt32x2 a){ + return FfxFloat32x4(ffxUnpackF32(a.x), ffxUnpackF32(a.y)); +} + //============================================================================================================================== // HLSL HALF //============================================================================================================================== @@ -1197,11 +1412,19 @@ FFX_MIN16_U4 ffxUint32x2ToUint16x4(FfxUInt32x2 x) return FFX_MIN16_U4(ffxUint32ToUint16x2(x.x), ffxUint32ToUint16x2(x.y)); } +FfxUInt32x2 ffxFloat16x4ToUint32x2(FFX_MIN16_F4 v) +{ + FfxUInt32x2 result; + result.x = ffxF32ToF16(v.x) | (ffxF32ToF16(v.y) << 16); + result.y = ffxF32ToF16(v.z) | (ffxF32ToF16(v.w) << 16); + return result; +} + /// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. /// @param v Value to invert. /// @return If v = 0 returns 0. If v != 0 returns 1/v. FfxFloat32 ffxInvertSafe(FfxFloat32 v){ - FfxFloat32 s = sign(v); + FfxFloat32 s = FfxFloat32(sign(v)); FfxFloat32 s2 = s*s; return s2/(v + s2 - 1.0); } @@ -1210,7 +1433,7 @@ FfxFloat32 ffxInvertSafe(FfxFloat32 v){ /// @param v Value to invert. /// @return If v = 0 returns 0. If v != 0 returns 1/v. FfxFloat32x2 ffxInvertSafe(FfxFloat32x2 v){ - FfxFloat32x2 s = sign(v); + FfxFloat32x2 s = FfxFloat32x2(sign(v)); FfxFloat32x2 s2 = s*s; return s2/(v + s2 - FfxFloat32x2(1.0, 1.0)); } @@ -1219,7 +1442,7 @@ FfxFloat32x2 ffxInvertSafe(FfxFloat32x2 v){ /// @param v Value to invert. /// @return If v = 0 returns 0. If v != 0 returns 1/v. FfxFloat32x3 ffxInvertSafe(FfxFloat32x3 v){ - FfxFloat32x3 s = sign(v); + FfxFloat32x3 s = FfxFloat32x3(sign(v)); FfxFloat32x3 s2 = s*s; return s2/(v + s2 - FfxFloat32x3(1.0, 1.0, 1.0)); } @@ -1228,7 +1451,7 @@ FfxFloat32x3 ffxInvertSafe(FfxFloat32x3 v){ /// @param v Value to invert. /// @return If v = 0 returns 0. If v != 0 returns 1/v. FfxFloat32x4 ffxInvertSafe(FfxFloat32x4 v){ - FfxFloat32x4 s = sign(v); + FfxFloat32x4 s = FfxFloat32x4(sign(v)); FfxFloat32x4 s2 = s*s; return s2/(v + s2 - FfxFloat32x4(1.0, 1.0, 1.0, 1.0)); } @@ -1241,7 +1464,7 @@ FfxFloat32x4 ffxInvertSafe(FfxFloat32x4 v){ #define FFX_UINT32X2_TO_UINT16X4(x) ffxUint32x2ToUint16x4(FfxUInt32x2(x)) FfxUInt32 ffxPackF16(FfxFloat16x2 v){ - FfxUInt32x2 p = FfxUInt32x2(f32tof16(FfxFloat32x2(v).x), f32tof16(FfxFloat32x2(v).y)); + FfxUInt32x2 p = FfxUInt32x2(ffxF32ToF16(FfxFloat32x2(v).x), ffxF32ToF16(FfxFloat32x2(v).y)); return p.x | (p.y << 16); } @@ -1252,7 +1475,7 @@ FfxFloat16x2 ffxUnpackF16(FfxUInt32 a){ //------------------------------------------------------------------------------------------------------------------------------ FfxUInt32 FFX_MIN16_F2ToUint32(FFX_MIN16_F2 x) { - return f32tof16(x.x) + (f32tof16(x.y) << 16); + return ffxF32ToF16(x.x) + (ffxF32ToF16(x.y) << 16); } FfxUInt32x2 FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4 x) { @@ -1277,7 +1500,7 @@ FfxUInt32x2 FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4 x) #define FFX_TO_UINT16X3(x) asuint16(x) #define FFX_TO_UINT16X4(x) asuint16(x) #else -#define FFX_TO_UINT16(a) FFX_MIN16_U(f32tof16(FfxFloat32(a))) +#define FFX_TO_UINT16(a) FFX_MIN16_U(ffxF32ToF16(FfxFloat32(a))) #define FFX_TO_UINT16X2(a) FFX_MIN16_U2(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y)) #define FFX_TO_UINT16X3(a) FFX_MIN16_U3(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z)) #define FFX_TO_UINT16X4(a) FFX_MIN16_U4(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z), FFX_TO_UINT16((a).w)) @@ -1537,95 +1760,119 @@ FFX_MIN16_U4 ffxBitShiftRightHalf(FFX_MIN16_U4 a, FFX_MIN16_U4 b) //============================================================================================================================== #if defined(FFX_WAVE) // Where 'x' must be a compile time literal. -FfxFloat32 AWaveXorF1(FfxFloat32 v, FfxUInt32 x) +FfxFloat32 ffxWaveXorF1(FfxFloat32 v, FfxUInt32 x) { return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); } -FfxFloat32x2 AWaveXorF2(FfxFloat32x2 v, FfxUInt32 x) +FfxFloat32x2 ffxWaveXorF2(FfxFloat32x2 v, FfxUInt32 x) { return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); } -FfxFloat32x3 AWaveXorF3(FfxFloat32x3 v, FfxUInt32 x) +FfxFloat32x3 ffxWaveXorF3(FfxFloat32x3 v, FfxUInt32 x) { return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); } -FfxFloat32x4 AWaveXorF4(FfxFloat32x4 v, FfxUInt32 x) +FfxFloat32x4 ffxWaveXorF4(FfxFloat32x4 v, FfxUInt32 x) { return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); } -FfxUInt32 AWaveXorU1(FfxUInt32 v, FfxUInt32 x) +FfxUInt32 ffxWaveXorU1(FfxUInt32 v, FfxUInt32 x) { return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); } -FfxUInt32x2 AWaveXorU1(FfxUInt32x2 v, FfxUInt32 x) +FfxUInt32x2 ffxWaveXorU1(FfxUInt32x2 v, FfxUInt32 x) { return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); } -FfxUInt32x3 AWaveXorU1(FfxUInt32x3 v, FfxUInt32 x) +FfxUInt32x3 ffxWaveXorU1(FfxUInt32x3 v, FfxUInt32 x) { return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); } -FfxUInt32x4 AWaveXorU1(FfxUInt32x4 v, FfxUInt32 x) +FfxUInt32x4 ffxWaveXorU1(FfxUInt32x4 v, FfxUInt32 x) { return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); } -FfxBoolean AWaveIsFirstLane() +FfxBoolean ffxWaveIsFirstLane() { return WaveIsFirstLane(); } -FfxUInt32 AWaveLaneIndex() +FfxUInt32 ffxWaveLaneIndex() { return WaveGetLaneIndex(); } -FfxBoolean AWaveReadAtLaneIndexB1(FfxBoolean v, FfxUInt32 x) +FfxBoolean ffxWaveReadAtLaneIndexB1(FfxBoolean v, FfxUInt32 x) { return WaveReadLaneAt(v, x); } -FfxUInt32 AWavePrefixCountBits(FfxBoolean v) +FfxUInt32 ffxWavePrefixCountBits(FfxBoolean v) { return WavePrefixCountBits(v); } -FfxUInt32 AWaveActiveCountBits(FfxBoolean v) +FfxUInt32 ffxWaveActiveCountBits(FfxBoolean v) { return WaveActiveCountBits(v); } -FfxUInt32 AWaveReadLaneFirstU1(FfxUInt32 v) +FfxUInt32 ffxWaveReadLaneFirstU1(FfxUInt32 v) +{ + return WaveReadLaneFirst(v); +} +FfxUInt32x2 ffxWaveReadLaneFirstU2(FfxUInt32x2 v) { return WaveReadLaneFirst(v); } -FfxUInt32 WaveOr(FfxUInt32 a) +FfxBoolean ffxWaveReadLaneFirstB1(FfxBoolean v) +{ + return WaveReadLaneFirst(v); +} +FfxUInt32 ffxWaveOr(FfxUInt32 a) { return WaveActiveBitOr(a); } -FfxFloat32 WaveMin(FfxFloat32 a) +FfxUInt32 ffxWaveMin(FfxUInt32 a) +{ + return WaveActiveMin(a); +} +FfxFloat32 ffxWaveMin(FfxFloat32 a) { return WaveActiveMin(a); } -FfxFloat32 WaveMax(FfxFloat32 a) +FfxUInt32 ffxWaveMax(FfxUInt32 a) +{ + return WaveActiveMax(a); +} +FfxFloat32 ffxWaveMax(FfxFloat32 a) { return WaveActiveMax(a); } -FfxUInt32 WaveLaneCount() +FfxUInt32 ffxWaveSum(FfxUInt32 a) +{ + return WaveActiveSum(a); +} +FfxFloat32 ffxWaveSum(FfxFloat32 a) +{ + return WaveActiveSum(a); +} +FfxUInt32 ffxWaveLaneCount() { return WaveGetLaneCount(); } -FfxBoolean WaveAllTrue(FfxBoolean v) +FfxBoolean ffxWaveAllTrue(FfxBoolean v) { return WaveActiveAllTrue(v); } -FfxFloat32 QuadReadX(FfxFloat32 v) +FfxFloat32 ffxQuadReadX(FfxFloat32 v) { return QuadReadAcrossX(v); } -FfxFloat32x2 QuadReadX(FfxFloat32x2 v) +FfxFloat32x2 ffxQuadReadX(FfxFloat32x2 v) { return QuadReadAcrossX(v); } -FfxFloat32 QuadReadY(FfxFloat32 v) +FfxFloat32 ffxQuadReadY(FfxFloat32 v) { return QuadReadAcrossY(v); } -FfxFloat32x2 QuadReadY(FfxFloat32x2 v) +FfxFloat32x2 ffxQuadReadY(FfxFloat32x2 v) { return QuadReadAcrossY(v); } diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core_portability.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core_portability.h index 84a62d6..12147b9 100644 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core_portability.h +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_core_portability.h @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,33 +20,27 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - -FfxFloat32x3 opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +void ffxOpAAddOneF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) { d = a + ffxBroadcast3(b); - return d; } -FfxFloat32x3 opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a) +void ffxOpACpyF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a) { d = a; - return d; } -FfxFloat32x3 opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b) +void ffxOpAMulF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b) { d = a * b; - return d; } -FfxFloat32x3 opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +void ffxOpAMulOneF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) { - d = a * ffxBroadcast3(b); - return d; + d = a * b; } -FfxFloat32x3 opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a) +void ffxOpARcpF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a) { - d = rcp(a); - return d; + d = ffxReciprocal(a); } diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h index c425de7..0e728d5 100644 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,270 +20,152 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - -#ifndef FFX_FSR3UPSCALER_ACCUMULATE_H -#define FFX_FSR3UPSCALER_ACCUMULATE_H - -FfxFloat32 GetPxHrVelocity(FfxFloat32x2 fMotionVector) +void Accumulate(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) { - return length(fMotionVector * DisplaySize()); -} -#if FFX_HALF -FFX_MIN16_F GetPxHrVelocity(FFX_MIN16_F2 fMotionVector) -{ - return length(fMotionVector * FFX_MIN16_F2(DisplaySize())); -} -#endif - -void Accumulate(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor, FfxFloat32x3 fAccumulation, FFX_PARAMETER_IN FfxFloat32x4 fUpsampledColorAndWeight) -{ - // Aviod invalid values when accumulation and upsampled weight is 0 - fAccumulation = ffxMax(FSR3UPSCALER_EPSILON.xxx, fAccumulation + fUpsampledColorAndWeight.www); + // Avoid invalid values when accumulation and upsampled weight is 0 + data.fHistoryWeight *= FfxFloat32(data.fHistoryWeight > FSR3UPSCALER_FP16_MIN); + data.fHistoryWeight = ffxMax(FSR3UPSCALER_EPSILON, data.fHistoryWeight + data.fUpsampledWeight); #if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT //YCoCg -> RGB -> Tonemap -> YCoCg (Use RGB tonemapper to avoid color desaturation) - fUpsampledColorAndWeight.xyz = RGBToYCoCg(Tonemap(YCoCgToRGB(fUpsampledColorAndWeight.xyz))); - fHistoryColor = RGBToYCoCg(Tonemap(YCoCgToRGB(fHistoryColor))); + data.fUpsampledColor = RGBToYCoCg(Tonemap(YCoCgToRGB(data.fUpsampledColor))); + data.fHistoryColor = RGBToYCoCg(Tonemap(YCoCgToRGB(data.fHistoryColor))); #endif - const FfxFloat32x3 fAlpha = fUpsampledColorAndWeight.www / fAccumulation; - fHistoryColor = ffxLerp(fHistoryColor, fUpsampledColorAndWeight.xyz, fAlpha); - - fHistoryColor = YCoCgToRGB(fHistoryColor); + const FfxFloat32 fAlpha = ffxSaturate(data.fUpsampledWeight / data.fHistoryWeight); + data.fHistoryColor = ffxLerp(data.fHistoryColor, data.fUpsampledColor, fAlpha); + data.fHistoryColor = YCoCgToRGB(data.fHistoryColor); #if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT - fHistoryColor = InverseTonemap(fHistoryColor); + data.fHistoryColor = InverseTonemap(data.fHistoryColor); #endif } void RectifyHistory( const AccumulationPassCommonParams params, - RectificationBox clippingBox, - FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor, - FFX_PARAMETER_INOUT FfxFloat32x3 fAccumulation, - FfxFloat32 fLockContributionThisFrame, - FfxFloat32 fTemporalReactiveFactor, - FfxFloat32 fLumaInstabilityFactor) + FFX_PARAMETER_INOUT AccumulationPassData data +) { - const FfxFloat32 fVecolityFactor = ffxSaturate(params.fHrVelocity / 20.0f); - const FfxFloat32 fBoxScaleT = ffxMax(params.fDepthClipFactor, ffxMax(params.fAccumulationMask, fVecolityFactor)); + const FfxFloat32 fVecolityFactor = ffxSaturate(params.f4KVelocity / 20.0f); + const FfxFloat32 fDistanceFactor = ffxSaturate(0.75f - params.fFarthestDepthInMeters / 20.0f); + const FfxFloat32 fAccumulationFactor = 1.0f - params.fAccumulation; + const FfxFloat32 fReactiveFactor = ffxPow(params.fReactiveMask, 1.0f / 2.0f); + const FfxFloat32 fShadingChangeFactor = params.fShadingChange; + const FfxFloat32 fBoxScaleT = ffxMax(fVecolityFactor, ffxMax(fDistanceFactor, ffxMax(fAccumulationFactor, ffxMax(fReactiveFactor, fShadingChangeFactor)))); + const FfxFloat32 fBoxScale = ffxLerp(3.0f, 1.0f, fBoxScaleT); - const FfxFloat32x3 fScaledBoxVec = clippingBox.boxVec * fBoxScale; - const FfxFloat32x3 boxMin = clippingBox.boxCenter - fScaledBoxVec; - const FfxFloat32x3 boxMax = clippingBox.boxCenter + fScaledBoxVec; + const FfxFloat32x3 fScaledBoxVec = data.clippingBox.boxVec * fBoxScale; + const FfxFloat32x3 fBoxMin = data.clippingBox.boxCenter - fScaledBoxVec; + const FfxFloat32x3 fBoxMax = data.clippingBox.boxCenter + fScaledBoxVec; - if (any(FFX_GREATER_THAN(boxMin, fHistoryColor)) || any(FFX_GREATER_THAN(fHistoryColor, boxMax))) { + if (any(FFX_GREATER_THAN(fBoxMin, data.fHistoryColor)) || any(FFX_GREATER_THAN(data.fHistoryColor, fBoxMax))) { - const FfxFloat32x3 fClampedHistoryColor = clamp(fHistoryColor, boxMin, boxMax); + const FfxFloat32x3 fClampedHistoryColor = clamp(data.fHistoryColor, fBoxMin, fBoxMax); - FfxFloat32x3 fHistoryContribution = ffxMax(fLumaInstabilityFactor, fLockContributionThisFrame).xxx; - - const FfxFloat32 fReactiveFactor = params.fDilatedReactiveFactor; - const FfxFloat32 fReactiveContribution = 1.0f - ffxPow(fReactiveFactor, 1.0f / 2.0f); - fHistoryContribution *= fReactiveContribution; + const FfxFloat32 fHistoryContribution = ffxMax(params.fLumaInstabilityFactor, data.fLockContributionThisFrame) * params.fAccumulation * (1 - params.fDisocclusion); // Scale history color using rectification info, also using accumulation mask to avoid potential invalid color protection - fHistoryColor = ffxLerp(fClampedHistoryColor, fHistoryColor, ffxSaturate(fHistoryContribution)); - - // Scale accumulation using rectification info - const FfxFloat32x3 fAccumulationMin = ffxMin(fAccumulation, FFX_BROADCAST_FLOAT32X3(0.1f)); - fAccumulation = ffxLerp(fAccumulationMin, fAccumulation, ffxSaturate(fHistoryContribution)); + data.fHistoryColor = ffxLerp(fClampedHistoryColor, data.fHistoryColor, ffxSaturate(fHistoryContribution)); } } -void WriteUpscaledOutput(FfxInt32x2 iPxHrPos, FfxFloat32x3 fUpscaledColor) +void UpdateLockStatus(AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) { - StoreUpscaledOutput(iPxHrPos, fUpscaledColor); -} + data.fLock *= FfxFloat32(params.bIsNewSample == false); -void FinalizeLockStatus(const AccumulationPassCommonParams params, FfxFloat32x2 fLockStatus, FfxFloat32 fUpsampledWeight) -{ - // we expect similar motion for next frame - // kill lock if that location is outside screen, avoid locks to be clamped to screen borders - FfxFloat32x2 fEstimatedUvNextFrame = params.fHrUv - params.fMotionVector; - if (IsUvInside(fEstimatedUvNextFrame) == false) { - KillLock(fLockStatus); - } - else { - // Decrease lock lifetime - const FfxFloat32 fLifetimeDecreaseLanczosMax = FfxFloat32(JitterSequenceLength()) * FfxFloat32(fAverageLanczosWeightPerFrame); - const FfxFloat32 fLifetimeDecrease = FfxFloat32(fUpsampledWeight / fLifetimeDecreaseLanczosMax); - fLockStatus[LOCK_LIFETIME_REMAINING] = ffxMax(FfxFloat32(0), fLockStatus[LOCK_LIFETIME_REMAINING] - fLifetimeDecrease); - } - - StoreLockStatus(params.iPxHrPos, fLockStatus); -} + const FfxFloat32 fLifetimeDecreaseFactor = ffxMax(ffxSaturate(params.fShadingChange), ffxMax(params.fReactiveMask, params.fDisocclusion)); + data.fLock = ffxMax(0.0f, data.fLock - fLifetimeDecreaseFactor * fLockMax); + // Compute this frame lock contribution + data.fLockContributionThisFrame = ffxSaturate(ffxSaturate(data.fLock - fLockThreshold) * (fLockMax - fLockThreshold)); -FfxFloat32x3 ComputeBaseAccumulationWeight(const AccumulationPassCommonParams params, FfxFloat32 fThisFrameReactiveFactor, FfxBoolean bInMotionLastFrame, FfxFloat32 fUpsampledWeight, LockState lockState) -{ - // Always assume max accumulation was reached - FfxFloat32 fBaseAccumulation = fMaxAccumulationLanczosWeight * FfxFloat32(params.bIsExistingSample) * (1.0f - fThisFrameReactiveFactor) * (1.0f - params.fDepthClipFactor); - - fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight * 10.0f, ffxMax(FfxFloat32(bInMotionLastFrame), ffxSaturate(params.fHrVelocity * FfxFloat32(10))))); + const FfxFloat32 fNewLockIntensity = LoadRwNewLocks(params.iPxHrPos) * (1.0f - ffxMax(params.fShadingChange * 0, params.fReactiveMask)); + data.fLock = ffxMax(0.0f, ffxMin(data.fLock + fNewLockIntensity, fLockMax)); - fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight, ffxSaturate(params.fHrVelocity / FfxFloat32(20)))); + // Preparing for next frame + const FfxFloat32 fLifetimeDecrease = (0.1f / JitterSequenceLength()) * (1.0f - fLifetimeDecreaseFactor); + data.fLock = ffxMax(0.0f, data.fLock - fLifetimeDecrease); - return fBaseAccumulation.xxx; + // we expect similar motion for next frame + // kill lock if that location is outside screen, avoid locks to be clamped to screen borders + const FfxFloat32x2 fEstimatedUvNextFrame = params.fHrUv - params.fMotionVector; + data.fLock *= FfxFloat32(IsUvInside(fEstimatedUvNextFrame) == true); } -FfxFloat32 ComputeLumaInstabilityFactor(const AccumulationPassCommonParams params, RectificationBox clippingBox, FfxFloat32 fThisFrameReactiveFactor, FfxFloat32 fLuminanceDiff) +void ComputeBaseAccumulationWeight(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) { - const FfxFloat32 fUnormThreshold = 1.0f / 255.0f; - const FfxInt32 N_MINUS_1 = 0; - const FfxInt32 N_MINUS_2 = 1; - const FfxInt32 N_MINUS_3 = 2; - const FfxInt32 N_MINUS_4 = 3; - - FfxFloat32 fCurrentFrameLuma = clippingBox.boxCenter.x; - -#if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT - fCurrentFrameLuma = fCurrentFrameLuma / (1.0f + ffxMax(0.0f, fCurrentFrameLuma)); -#endif - - fCurrentFrameLuma = round(fCurrentFrameLuma * 255.0f) / 255.0f; - - const FfxBoolean bSampleLumaHistory = (ffxMax(ffxMax(params.fDepthClipFactor, params.fAccumulationMask), fLuminanceDiff) < 0.1f) && (params.bIsNewSample == false); - FfxFloat32x4 fCurrentFrameLumaHistory = bSampleLumaHistory ? SampleLumaHistory(params.fReprojectedHrUv) : FFX_BROADCAST_FLOAT32X4(0.0f); - - FfxFloat32 fLumaInstability = 0.0f; - FfxFloat32 fDiffs0 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[N_MINUS_1]); - - FfxFloat32 fMin = abs(fDiffs0); + FfxFloat32 fBaseAccumulation = params.fAccumulation; - if (fMin >= fUnormThreshold) { - for (int i = N_MINUS_2; i <= N_MINUS_4; i++) { - FfxFloat32 fDiffs1 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[i]); + fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, 0.15f, ffxSaturate(ffxMax(0.0f, params.f4KVelocity / 0.5f)))); - if (sign(fDiffs0) == sign(fDiffs1)) { - - // Scale difference to protect historically similar values - const FfxFloat32 fMinBias = 1.0f; - fMin = ffxMin(fMin, abs(fDiffs1) * fMinBias); - } - } - - const FfxFloat32 fBoxSize = clippingBox.boxVec.x; - const FfxFloat32 fBoxSizeFactor = ffxPow(ffxSaturate(fBoxSize / 0.1f), 6.0f); - - fLumaInstability = FfxFloat32(fMin != abs(fDiffs0)) * fBoxSizeFactor; - fLumaInstability = FfxFloat32(fLumaInstability > fUnormThreshold); - - fLumaInstability *= 1.0f - ffxMax(params.fAccumulationMask, ffxPow(fThisFrameReactiveFactor, 1.0f / 6.0f)); - } - - //shift history - fCurrentFrameLumaHistory[N_MINUS_4] = fCurrentFrameLumaHistory[N_MINUS_3]; - fCurrentFrameLumaHistory[N_MINUS_3] = fCurrentFrameLumaHistory[N_MINUS_2]; - fCurrentFrameLumaHistory[N_MINUS_2] = fCurrentFrameLumaHistory[N_MINUS_1]; - fCurrentFrameLumaHistory[N_MINUS_1] = fCurrentFrameLuma; - - StoreLumaHistory(params.iPxHrPos, fCurrentFrameLumaHistory); - - return fLumaInstability * FfxFloat32(fCurrentFrameLumaHistory[N_MINUS_4] != 0); + data.fHistoryWeight = fBaseAccumulation; } -FfxFloat32 ComputeTemporalReactiveFactor(const AccumulationPassCommonParams params, FfxFloat32 fTemporalReactiveFactor) +void InitPassData(FfxInt32x2 iPxHrPos, FFX_PARAMETER_INOUT AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) { - FfxFloat32 fNewFactor = ffxMin(0.99f, fTemporalReactiveFactor); - - fNewFactor = ffxMax(fNewFactor, ffxLerp(fNewFactor, 0.4f, ffxSaturate(params.fHrVelocity))); - - fNewFactor = ffxMax(fNewFactor * fNewFactor, ffxMax(params.fDepthClipFactor * 0.1f, params.fDilatedReactiveFactor)); - - // Force reactive factor for new samples - fNewFactor = params.bIsNewSample ? 1.0f : fNewFactor; - - if (ffxSaturate(params.fHrVelocity * 10.0f) >= 1.0f) { - fNewFactor = ffxMax(FSR3UPSCALER_EPSILON, fNewFactor) * -1.0f; - } - - return fNewFactor; + // Init constant params + params.iPxHrPos = iPxHrPos; + const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / UpscaleSize(); + params.fHrUv = fHrUv; + params.fLrUvJittered = fHrUv + Jitter() / RenderSize(); + params.fLrUv_HwSampler = ClampUv(params.fLrUvJittered, RenderSize(), MaxRenderSize()); + + params.fMotionVector = GetMotionVector(iPxHrPos, fHrUv); + params.f4KVelocity = Get4KVelocity(params.fMotionVector); + + ComputeReprojectedUVs(params); + + const FfxFloat32x2 fLumaInstabilityUv_HW = ClampUv(fHrUv, RenderSize(), MaxRenderSize()); + params.fLumaInstabilityFactor = SampleLumaInstability(fLumaInstabilityUv_HW); + + const FfxFloat32x2 fFarthestDepthUv = ClampUv(params.fLrUvJittered, RenderSize() / 2, GetFarthestDepthMip1ResourceDimensions()); + params.fFarthestDepthInMeters = SampleFarthestDepthMip1(fFarthestDepthUv); + params.bIsNewSample = (params.bIsExistingSample == false || 0 == FrameIndex()); + + const FfxFloat32x4 fDilatedReactiveMasks = SampleDilatedReactiveMasks(params.fLrUv_HwSampler); + params.fReactiveMask = ffxSaturate(fDilatedReactiveMasks[REACTIVE]); + params.fDisocclusion = ffxSaturate(fDilatedReactiveMasks[DISOCCLUSION]); + params.fShadingChange = ffxSaturate(fDilatedReactiveMasks[SHADING_CHANGE]); + params.fAccumulation = ffxSaturate(fDilatedReactiveMasks[ACCUMULAION]); + params.fAccumulation *= FfxFloat32(round(params.fAccumulation * 100.0f) > 1.0f); + + // Init variable data + data.fUpsampledColor = FfxFloat32x3(0.0f, 0.0f, 0.0f); + data.fHistoryColor = FfxFloat32x3(0.0f, 0.0f, 0.0f); + data.fHistoryWeight = 1.0f; + data.fUpsampledWeight = 0.0f; + data.fLock = 0.0f; + data.fLockContributionThisFrame = 0.0f; } -AccumulationPassCommonParams InitParams(FfxInt32x2 iPxHrPos) +void Accumulate(FfxInt32x2 iPxHrPos) { AccumulationPassCommonParams params; + AccumulationPassData data; + InitPassData(iPxHrPos, params, data); - params.iPxHrPos = iPxHrPos; - const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize(); - params.fHrUv = fHrUv; - - const FfxFloat32x2 fLrUvJittered = fHrUv + Jitter() / RenderSize(); - params.fLrUv_HwSampler = ClampUv(fLrUvJittered, RenderSize(), MaxRenderSize()); - - params.fMotionVector = GetMotionVector(iPxHrPos, fHrUv); - params.fHrVelocity = GetPxHrVelocity(params.fMotionVector); - - ComputeReprojectedUVs(params, params.fReprojectedHrUv, params.bIsExistingSample); - - params.fDepthClipFactor = ffxSaturate(SampleDepthClip(params.fLrUv_HwSampler)); - - const FfxFloat32x2 fDilatedReactiveMasks = SampleDilatedReactiveMasks(params.fLrUv_HwSampler); - params.fDilatedReactiveFactor = fDilatedReactiveMasks.x; - params.fAccumulationMask = fDilatedReactiveMasks.y; - params.bIsResetFrame = (0 == FrameIndex()); - - params.bIsNewSample = (params.bIsExistingSample == false || params.bIsResetFrame); - - return params; -} - -void Accumulate(FfxInt32x2 iPxHrPos) -{ - const AccumulationPassCommonParams params = InitParams(iPxHrPos); - - FfxFloat32x3 fHistoryColor = FfxFloat32x3(0, 0, 0); - FfxFloat32x2 fLockStatus; - InitializeNewLockSample(fLockStatus); - - FfxFloat32 fTemporalReactiveFactor = 0.0f; - FfxBoolean bInMotionLastFrame = FFX_FALSE; - LockState lockState = { FFX_FALSE , FFX_FALSE }; - if (params.bIsExistingSample && !params.bIsResetFrame) { - ReprojectHistoryColor(params, fHistoryColor, fTemporalReactiveFactor, bInMotionLastFrame); - lockState = ReprojectHistoryLockStatus(params, fLockStatus); + if (params.bIsExistingSample && !params.bIsNewSample) { + ReprojectHistoryColor(params, data); } - - FfxFloat32 fThisFrameReactiveFactor = ffxMax(params.fDilatedReactiveFactor, fTemporalReactiveFactor); - - FfxFloat32 fLuminanceDiff = 0.0f; - FfxFloat32 fLockContributionThisFrame = 0.0f; - UpdateLockStatus(params, fThisFrameReactiveFactor, lockState, fLockStatus, fLockContributionThisFrame, fLuminanceDiff); - - // Load upsampled input color - RectificationBox clippingBox; - FfxFloat32x4 fUpsampledColorAndWeight = ComputeUpsampledColorAndWeight(params, clippingBox, fThisFrameReactiveFactor); - const FfxFloat32 fLumaInstabilityFactor = ComputeLumaInstabilityFactor(params, clippingBox, fThisFrameReactiveFactor, fLuminanceDiff); - + UpdateLockStatus(params, data); - FfxFloat32x3 fAccumulation = ComputeBaseAccumulationWeight(params, fThisFrameReactiveFactor, bInMotionLastFrame, fUpsampledColorAndWeight.w, lockState); + ComputeBaseAccumulationWeight(params, data); - if (params.bIsNewSample) { - fHistoryColor = YCoCgToRGB(fUpsampledColorAndWeight.xyz); - } - else { - RectifyHistory(params, clippingBox, fHistoryColor, fAccumulation, fLockContributionThisFrame, fThisFrameReactiveFactor, fLumaInstabilityFactor); - - Accumulate(params, fHistoryColor, fAccumulation, fUpsampledColorAndWeight); - } + ComputeUpsampledColorAndWeight(params, data); - fHistoryColor = UnprepareRgb(fHistoryColor, Exposure()); + RectifyHistory(params, data); - FinalizeLockStatus(params, fLockStatus, fUpsampledColorAndWeight.w); + Accumulate(params, data); - // Get new temporal reactive factor - fTemporalReactiveFactor = ComputeTemporalReactiveFactor(params, fThisFrameReactiveFactor); + data.fHistoryColor /= Exposure(); - StoreInternalColorAndWeight(iPxHrPos, FfxFloat32x4(fHistoryColor, fTemporalReactiveFactor)); + StoreInternalColorAndWeight(iPxHrPos, FfxFloat32x4(data.fHistoryColor, data.fLock)); // Output final color when RCAS is disabled #if FFX_FSR3UPSCALER_OPTION_APPLY_SHARPENING == 0 - WriteUpscaledOutput(iPxHrPos, fHistoryColor); + StoreUpscaledOutput(iPxHrPos, data.fHistoryColor); #endif StoreNewLocks(iPxHrPos, 0); } - -#endif // FFX_FSR3UPSCALER_ACCUMULATE_H diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_glsl.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_glsl.h new file mode 100644 index 0000000..685712a --- /dev/null +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_glsl.h @@ -0,0 +1,882 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "ffx_fsr3upscaler_resources.h" + +#if defined(FFX_GPU) +#include "ffx_core.h" +#endif // #if defined(FFX_GPU) + +#if defined(FFX_GPU) +#ifndef FFX_PREFER_WAVE64 +#define FFX_PREFER_WAVE64 +#endif // FFX_PREFER_WAVE64 + +#if defined(FSR3UPSCALER_BIND_CB_FSR3UPSCALER) + layout (set = 0, binding = FSR3UPSCALER_BIND_CB_FSR3UPSCALER, std140) uniform cbFSR3UPSCALER_t + { + FfxInt32x2 iRenderSize; + FfxInt32x2 iPreviousFrameRenderSize; + + FfxInt32x2 iUpscaleSize; + FfxInt32x2 iPreviousFrameUpscaleSize; + + FfxInt32x2 iMaxRenderSize; + FfxInt32x2 iMaxUpscaleSize; + + FfxFloat32x4 fDeviceToViewDepth; + + FfxFloat32x2 fJitter; + FfxFloat32x2 fPreviousFrameJitter; + + FfxFloat32x2 fMotionVectorScale; + FfxFloat32x2 fDownscaleFactor; + + FfxFloat32x2 fMotionVectorJitterCancellation; + FfxFloat32 fTanHalfFOV; + FfxFloat32 fJitterSequenceLength; + + FfxFloat32 fDeltaTime; + FfxFloat32 fDeltaPreExposure; + FfxFloat32 fViewSpaceToMetersFactor; + FfxFloat32 fFrameIndex; + } cbFSR3Upscaler; + + +FfxInt32x2 RenderSize() +{ + return cbFSR3Upscaler.iRenderSize; +} + +FfxInt32x2 PreviousFrameRenderSize() +{ + return cbFSR3Upscaler.iPreviousFrameRenderSize; +} + +FfxInt32x2 MaxRenderSize() +{ + return cbFSR3Upscaler.iMaxRenderSize; +} + +FfxInt32x2 UpscaleSize() +{ + return cbFSR3Upscaler.iUpscaleSize; +} + +FfxInt32x2 PreviousFrameUpscaleSize() +{ + return cbFSR3Upscaler.iPreviousFrameUpscaleSize; +} + +FfxInt32x2 MaxUpscaleSize() +{ + return cbFSR3Upscaler.iMaxUpscaleSize; +} + +FfxFloat32x2 Jitter() +{ + return cbFSR3Upscaler.fJitter; +} + +FfxFloat32x2 PreviousFrameJitter() +{ + return cbFSR3Upscaler.fPreviousFrameJitter; +} + +FfxFloat32x4 DeviceToViewSpaceTransformFactors() +{ + return cbFSR3Upscaler.fDeviceToViewDepth; +} + +FfxFloat32x2 MotionVectorScale() +{ + return cbFSR3Upscaler.fMotionVectorScale; +} + +FfxFloat32x2 DownscaleFactor() +{ + return cbFSR3Upscaler.fDownscaleFactor; +} + +FfxFloat32x2 MotionVectorJitterCancellation() +{ + return cbFSR3Upscaler.fMotionVectorJitterCancellation; +} + +FfxFloat32 TanHalfFoV() +{ + return cbFSR3Upscaler.fTanHalfFOV; +} + +FfxFloat32 JitterSequenceLength() +{ + return cbFSR3Upscaler.fJitterSequenceLength; +} + +FfxFloat32 DeltaTime() +{ + return cbFSR3Upscaler.fDeltaTime; +} + +FfxFloat32 DeltaPreExposure() +{ + return cbFSR3Upscaler.fDeltaPreExposure; +} + +FfxFloat32 ViewSpaceToMetersFactor() +{ + return cbFSR3Upscaler.fViewSpaceToMetersFactor; +} + +FfxFloat32 FrameIndex() +{ + return cbFSR3Upscaler.fFrameIndex; +} + +#endif // #if defined(FSR3UPSCALER_BIND_CB_FSR3UPSCALER) + + +#if defined(FSR3UPSCALER_BIND_CB_AUTOREACTIVE) +layout(set = 0, binding = FSR3UPSCALER_BIND_CB_AUTOREACTIVE, std140) uniform cbGenerateReactive_t +{ + FfxFloat32 fTcThreshold; // 0.1 is a good starting value, lower will result in more TC pixels + FfxFloat32 fTcScale; + FfxFloat32 fReactiveScale; + FfxFloat32 fReactiveMax; +} cbGenerateReactive; + +FfxFloat32 TcThreshold() +{ + return cbGenerateReactive.fTcThreshold; +} + +FfxFloat32 TcScale() +{ + return cbGenerateReactive.fTcScale; +} + +FfxFloat32 ReactiveScale() +{ + return cbGenerateReactive.fReactiveScale; +} + +FfxFloat32 ReactiveMax() +{ + return cbGenerateReactive.fReactiveMax; +} +#endif // #if defined(FSR3UPSCALER_BIND_CB_AUTOREACTIVE) + +#if defined(FSR3UPSCALER_BIND_CB_RCAS) +layout(set = 0, binding = FSR3UPSCALER_BIND_CB_RCAS, std140) uniform cbRCAS_t +{ + FfxUInt32x4 rcasConfig; +} cbRCAS; + +FfxUInt32x4 RCASConfig() +{ + return cbRCAS.rcasConfig; +} +#endif // #if defined(FSR3UPSCALER_BIND_CB_RCAS) + + +#if defined(FSR3UPSCALER_BIND_CB_REACTIVE) +layout(set = 0, binding = FSR3UPSCALER_BIND_CB_REACTIVE, std140) uniform cbGenerateReactive_t +{ + FfxFloat32 gen_reactive_scale; + FfxFloat32 gen_reactive_threshold; + FfxFloat32 gen_reactive_binaryValue; + FfxUInt32 gen_reactive_flags; +} cbGenerateReactive; + +FfxFloat32 GenReactiveScale() +{ + return cbGenerateReactive.gen_reactive_scale; +} + +FfxFloat32 GenReactiveThreshold() +{ + return cbGenerateReactive.gen_reactive_threshold; +} + +FfxFloat32 GenReactiveBinaryValue() +{ + return cbGenerateReactive.gen_reactive_binaryValue; +} + +FfxUInt32 GenReactiveFlags() +{ + return cbGenerateReactive.gen_reactive_flags; +} +#endif // #if defined(FSR3UPSCALER_BIND_CB_REACTIVE) + + +#if defined(FSR3UPSCALER_BIND_CB_SPD) +layout(set = 0, binding = FSR3UPSCALER_BIND_CB_SPD, std140) uniform cbSPD_t +{ + FfxUInt32 mips; + FfxUInt32 numWorkGroups; + FfxUInt32x2 workGroupOffset; + FfxUInt32x2 renderSize; +} cbSPD; + +FfxUInt32 MipCount() +{ + return cbSPD.mips; +} + +FfxUInt32 NumWorkGroups() +{ + return cbSPD.numWorkGroups; +} + +FfxUInt32x2 WorkGroupOffset() +{ + return cbSPD.workGroupOffset; +} + +FfxUInt32x2 SPD_RenderSize() +{ + return cbSPD.renderSize; +} +#endif // #if defined(FSR3UPSCALER_BIND_CB_SPD) + +layout (set = 0, binding = 1000) uniform sampler s_PointClamp; +layout (set = 0, binding = 1001) uniform sampler s_LinearClamp; + +#if defined(FSR3UPSCALER_BIND_SRV_SPD_MIPS) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_SPD_MIPS) uniform texture2D r_spd_mips; + +FfxInt32x2 GetSPDMipDimensions(FfxUInt32 uMipLevel) +{ + return textureSize(r_spd_mips, int(uMipLevel)).xy; +} + +FfxFloat32x2 SampleSPDMipLevel(FfxFloat32x2 fUV, FfxUInt32 mipLevel) +{ + return textureLod(sampler2D(r_spd_mips, s_LinearClamp), fUV, float(mipLevel)).rg; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INPUT_DEPTH) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_INPUT_DEPTH) uniform texture2D r_input_depth; + +FfxFloat32 LoadInputDepth(FfxInt32x2 iPxPos) +{ + return texelFetch(r_input_depth, iPxPos, 0).r; +} + +FfxFloat32 SampleInputDepth(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_input_depth, s_LinearClamp), fUV, 0.0).x; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_REACTIVE_MASK) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_REACTIVE_MASK) uniform texture2D r_reactive_mask; + +FfxFloat32 LoadReactiveMask(FfxInt32x2 iPxPos) +{ + return texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r; +} + +FfxInt32x2 GetReactiveMaskResourceDimensions() +{ + return textureSize(r_reactive_mask, 0).xy; +} + +FfxFloat32 SampleReactiveMask(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_reactive_mask, s_LinearClamp), fUV, 0.0).x; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) uniform texture2D r_transparency_and_composition_mask; + +FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos) +{ + return texelFetch(r_transparency_and_composition_mask, FfxInt32x2(iPxPos), 0).r; +} + +FfxInt32x2 GetTransparencyAndCompositionMaskResourceDimensions() +{ + return textureSize(r_transparency_and_composition_mask, 0).xy; +} + +FfxFloat32 SampleTransparencyAndCompositionMask(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_transparency_and_composition_mask, s_LinearClamp), fUV, 0.0).x; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INPUT_COLOR) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_INPUT_COLOR) uniform texture2D r_input_color_jittered; + +FfxFloat32x3 LoadInputColor(FfxInt32x2 iPxPos) +{ + return texelFetch(r_input_color_jittered, iPxPos, 0).rgb; +} + +FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_input_color_jittered, s_LinearClamp), fUV, 0.0).rgb; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS) uniform texture2D r_input_motion_vectors; + +FfxFloat32x2 LoadInputMotionVector(FfxInt32x2 iPxDilatedMotionVectorPos) +{ + FfxFloat32x2 fSrcMotionVector = texelFetch(r_input_motion_vectors, iPxDilatedMotionVectorPos, 0).xy; + + FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale(); + +#if FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS + fUvMotionVector -= MotionVectorJitterCancellation(); +#endif + + return fUvMotionVector; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED) uniform texture2D r_internal_upscaled_color; + +FfxFloat32x4 LoadHistory(FfxInt32x2 iPxHistory) +{ + return texelFetch(r_internal_upscaled_color, iPxHistory, 0); +} + +FfxFloat32x4 SampleHistory(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_internal_upscaled_color, s_LinearClamp), fUV, 0.0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_LUMA_HISTORY) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_LUMA_HISTORY, rgba8) uniform image2D rw_luma_history; + +void StoreLumaHistory(FfxInt32x2 iPxPos, FfxFloat32x4 fLumaHistory) +{ + imageStore(rw_luma_history, iPxPos, fLumaHistory); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_LUMA_HISTORY) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_LUMA_HISTORY) uniform texture2D r_luma_history; + +FfxFloat32x4 LoadLumaHistory(FfxInt32x2 iPxPos) +{ + return texelFetch(r_luma_history, iPxPos, 0); +} + +FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_luma_history, s_LinearClamp), fUV, 0.0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_RCAS_INPUT) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_RCAS_INPUT) uniform texture2D r_rcas_input; + +FfxFloat32x4 LoadRCAS_Input(FfxInt32x2 iPxPos) +{ + return texelFetch(r_rcas_input, iPxPos, 0); +} + +FfxFloat32x3 SampleRCAS_Input(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_rcas_input, s_LinearClamp), fUV, 0.0).rgb; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED, rgba16f) writeonly uniform image2D rw_internal_upscaled_color; + +void StoreReprojectedHistory(FfxInt32x2 iPxHistory, FfxFloat32x4 fHistory) +{ + imageStore(rw_internal_upscaled_color, iPxHistory, fHistory); +} + +void StoreInternalColorAndWeight(FfxInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight) +{ + imageStore(rw_internal_upscaled_color, iPxPos, fColorAndWeight); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT /* app controlled format */) writeonly uniform image2D rw_upscaled_output; + +void StoreUpscaledOutput(FfxInt32x2 iPxPos, FfxFloat32x3 fColor) +{ + imageStore(rw_upscaled_output, iPxPos, FfxFloat32x4(fColor, 1.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_ACCUMULATION) +layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_ACCUMULATION) uniform texture2D r_accumulation; + +FfxFloat32 SampleAccumulation(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_accumulation, s_LinearClamp), fUV, 0.0).x; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_ACCUMULATION) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_ACCUMULATION, r8) uniform image2D rw_accumulation; + +void StoreAccumulation(FfxInt32x2 iPxPos, FfxFloat32 fAccumulation) +{ + imageStore(rw_accumulation, iPxPos, vec4(fAccumulation, 0.0, 0.0, 0.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_SHADING_CHANGE) +layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_SHADING_CHANGE) uniform texture2D r_shading_change; + +FfxFloat32 LoadShadingChange(FfxInt32x2 iPxPos) +{ + return texelFetch(r_shading_change, iPxPos, 0).x; +} + +FfxFloat32 SampleShadingChange(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_shading_change, s_LinearClamp), fUV, 0.0).x; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_SHADING_CHANGE) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SHADING_CHANGE, r8) uniform image2D rw_shading_change; + +void StoreShadingChange(FfxInt32x2 iPxPos, FfxFloat32 fShadingChange) +{ + imageStore(rw_shading_change, iPxPos, vec4(fShadingChange, 0.0, 0.0, 0.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH) +layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH) uniform texture2D r_farthest_depth; + +FfxInt32x2 GetFarthestDepthResourceDimensions() +{ + return textureSize(r_farthest_depth, 0).xy; +} + +FfxFloat32 LoadFarthestDepth(FfxInt32x2 iPxPos) +{ + return texelFetch(r_farthest_depth, iPxPos, 0).x; +} + +FfxFloat32 SampleFarthestDepth(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_farthest_depth, s_LinearClamp), fUV, 0.0).x; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH, r16f) uniform image2D rw_farthest_depth; + +void StoreFarthestDepth(FfxInt32x2 iPxPos, FfxFloat32 fDepth) +{ + imageStore(rw_farthest_depth, iPxPos, vec4(fDepth, 0.0, 0.0, 0.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH_MIP1) +layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH_MIP1) uniform texture2D r_farthest_depth_mip1; + +FfxInt32x2 GetFarthestDepthMip1ResourceDimensions() +{ + return textureSize(r_farthest_depth_mip1, 0).xy; +} + +FfxFloat32 LoadFarthestDepthMip1(FfxInt32x2 iPxPos) +{ + return texelFetch(r_farthest_depth_mip1, iPxPos, 0).x; +} + +FfxFloat32 SampleFarthestDepthMip1(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_farthest_depth_mip1, s_LinearClamp), fUV, 0.0).x; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH_MIP1) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH_MIP1, r16f) uniform image2D rw_farthest_depth_mip1; + +void StoreFarthestDepthMip1(FfxInt32x2 iPxPos, FfxFloat32 fDepth) +{ + imageStore(rw_farthest_depth_mip1, iPxPos, vec4(fDepth, 0.0, 0.0, 0.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_CURRENT_LUMA) +layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_CURRENT_LUMA) uniform texture2D r_current_luma; + +FfxFloat32 LoadCurrentLuma(FfxInt32x2 iPxPos) +{ + return texelFetch(r_current_luma, iPxPos, 0).r; +} + +FfxFloat32 SampleCurrentLuma(FfxFloat32x2 uv) +{ + return textureLod(sampler2D(r_current_luma, s_LinearClamp), uv, 0.0).r; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_CURRENT_LUMA) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_CURRENT_LUMA, r16f) uniform image2D rw_current_luma; + +void StoreCurrentLuma(FfxInt32x2 iPxPos, FfxFloat32 fLuma) +{ + imageStore(rw_current_luma, iPxPos, vec4(fLuma, 0.0, 0.0, 0.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_LUMA_INSTABILITY) +layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_LUMA_INSTABILITY) uniform texture2D r_luma_instability; + +FfxFloat32 SampleLumaInstability(FfxFloat32x2 uv) +{ + return textureLod(sampler2D(r_luma_instability, s_LinearClamp), uv, 0.0).x; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_LUMA_INSTABILITY) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_LUMA_INSTABILITY, r16f) uniform image2D rw_luma_instability; + +void StoreLumaInstability(FfxInt32x2 iPxPos, FfxFloat32 fLumaInstability) +{ + imageStore(rw_luma_instability, iPxPos, vec4(fLumaInstability, 0.0, 0.0, 0.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_PREVIOUS_LUMA) +layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_PREVIOUS_LUMA) uniform texture2D r_previous_luma; + +FfxFloat32 LoadPreviousLuma(FfxInt32x2 iPxPos) +{ + return texelFetch(r_previous_luma, iPxPos, 0).r; +} + +FfxFloat32 SamplePreviousLuma(FfxFloat32x2 uv) +{ + return textureLod(sampler2D(r_previous_luma, s_LinearClamp), uv, 0.0).r; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_NEW_LOCKS) +layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_NEW_LOCKS) uniform texture2D r_new_locks; + +FfxFloat32 LoadNewLocks(FfxInt32x2 iPxPos) +{ + return texelFetch(r_new_locks, iPxPos, 0).r; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_NEW_LOCKS) +layout(set = 0, binding = FSR3UPSCALER_BIND_UAV_NEW_LOCKS, r8) uniform image2D rw_new_locks; + +FfxFloat32 LoadRwNewLocks(FfxInt32x2 iPxPos) +{ + return imageLoad(rw_new_locks, iPxPos).r; +} + +void StoreNewLocks(FfxInt32x2 iPxPos, FfxFloat32 newLock) +{ + imageStore(rw_new_locks, iPxPos, vec4(newLock, 0, 0, 0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) uniform utexture2D r_reconstructed_previous_nearest_depth; + +FfxFloat32 LoadReconstructedPrevDepth(FfxInt32x2 iPxPos) +{ + return uintBitsToFloat(texelFetch(r_reconstructed_previous_nearest_depth, iPxPos, 0).r); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, r32ui) uniform uimage2D rw_reconstructed_previous_nearest_depth; + +void StoreReconstructedDepth(FfxInt32x2 iPxSample, FfxFloat32 fDepth) +{ + FfxUInt32 uDepth = floatBitsToUint(fDepth); + + #if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH + imageAtomicMax(rw_reconstructed_previous_nearest_depth, iPxSample, uDepth); + #else + imageAtomicMin(rw_reconstructed_previous_nearest_depth, iPxSample, uDepth); // min for standard, max for inverted depth + #endif +} + +void SetReconstructedDepth(FfxInt32x2 iPxSample, FfxUInt32 uValue) +{ + imageStore(rw_reconstructed_previous_nearest_depth, iPxSample, uvec4(uValue, 0, 0, 0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_DILATED_DEPTH) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_DILATED_DEPTH, r16f) writeonly uniform image2D rw_dilated_depth; + +void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth) +{ + imageStore(rw_dilated_depth, iPxPos, vec4(fDepth, 0.0, 0.0, 0.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS, rg16f) writeonly uniform image2D rw_dilated_motion_vectors; + +void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector) +{ + imageStore(rw_dilated_motion_vectors, iPxPos, vec4(fMotionVector, 0.0, 0.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS) uniform texture2D r_dilated_motion_vectors; + +FfxFloat32x2 LoadDilatedMotionVector(FfxInt32x2 iPxInput) +{ + return texelFetch(r_dilated_motion_vectors, iPxInput, 0).xy; +} + +FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_dilated_motion_vectors, s_LinearClamp), fUV, 0.0).xy; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_DILATED_DEPTH) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_DILATED_DEPTH) uniform texture2D r_dilated_depth; + +FfxFloat32 LoadDilatedDepth(FfxInt32x2 iPxInput) +{ + return texelFetch(r_dilated_depth, iPxInput, 0).r; +} + +FfxFloat32 SampleDilatedDepth(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_dilated_depth, s_LinearClamp), fUV, 0.0).r; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE) uniform texture2D r_input_exposure; + +FfxFloat32 Exposure() +{ + FfxFloat32 exposure = texelFetch(r_input_exposure, FfxInt32x2(0, 0), 0).x; + + if (exposure == 0.0) { + exposure = 1.0; + } + + return exposure; +} +#endif + +// BEGIN: FSR3UPSCALER_BIND_SRV_LANCZOS_LUT +#if defined(FSR3UPSCALER_BIND_SRV_LANCZOS_LUT) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_LANCZOS_LUT) uniform texture2D r_lanczos_lut; +#endif + +FfxFloat32 SampleLanczos2Weight(FfxFloat32 x) +{ +#if defined(FSR3UPSCALER_BIND_SRV_LANCZOS_LUT) + return textureLod(sampler2D(r_lanczos_lut, s_LinearClamp), FfxFloat32x2(x / 2.0, 0.5), 0.0).x; +#else + return 0.f; +#endif +} +// END: FSR3UPSCALER_BIND_SRV_LANCZOS_LUT + +#if defined(FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS) uniform texture2D r_dilated_reactive_masks; + +FfxFloat32x4 SampleDilatedReactiveMasks(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_dilated_reactive_masks, s_LinearClamp), fUV, 0.0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS, rgba8) writeonly uniform image2D rw_dilated_reactive_masks; + +void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fDilatedReactiveMasks) +{ + imageStore(rw_dilated_reactive_masks, iPxPos, fDilatedReactiveMasks); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY) uniform texture2D r_input_opaque_only; + +FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return texelFetch(r_input_opaque_only, iPxPos, 0).xyz; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_PREV_PRE_ALPHA_COLOR) +layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_PREV_PRE_ALPHA_COLOR) uniform texture2D r_input_prev_color_pre_alpha; + +FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return texelFetch(r_input_prev_color_pre_alpha, iPxPos, 0).xyz; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_PREV_POST_ALPHA_COLOR) +layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_PREV_POST_ALPHA_COLOR) uniform texture2D r_input_prev_color_post_alpha; + +FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return texelFetch(r_input_prev_color_post_alpha, iPxPos, 0).xyz; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_AUTOREACTIVE) && \ + defined(FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION) + +layout(set = 0, binding = FSR3UPSCALER_BIND_UAV_AUTOREACTIVE, r32f) uniform image2D rw_output_autoreactive; +layout(set = 0, binding = FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION, r32f) uniform image2D rw_output_autocomposition; + +void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive) +{ + imageStore(rw_output_autoreactive, iPxPos, FfxFloat32x4(FfxFloat32(fReactive.x), 0.0, 0.0, 0.0)); + + imageStore(rw_output_autocomposition, iPxPos, FfxFloat32x4(FfxFloat32(fReactive.y), 0.0, 0.0, 0.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR) +layout(set = 0, binding = FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_pre_alpha; + +void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) +{ + imageStore(rw_output_prev_color_pre_alpha, iPxPos, FfxFloat32x4(color, 0.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR) +layout(set = 0, binding = FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_post_alpha; + +void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) +{ + imageStore(rw_output_prev_color_post_alpha, iPxPos, FfxFloat32x4(color, 0.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_FRAME_INFO) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_FRAME_INFO, rgba32f) uniform image2D rw_frame_info; + +FfxFloat32x4 LoadFrameInfo() +{ + return imageLoad(rw_frame_info, ivec2(0, 0)); +} + +void StoreFrameInfo(FfxFloat32x4 fInfo) +{ + imageStore(rw_frame_info, ivec2(0, 0), fInfo); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_FRAME_INFO) +layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_FRAME_INFO) uniform texture2D r_frame_info; + +FfxFloat32x4 FrameInfo() +{ + return texelFetch(r_frame_info, ivec2(0, 0), 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_0) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_1) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_2) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_3) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5) + +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_0, rg16f) uniform image2D rw_spd_mip0; +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_1, rg16f) uniform image2D rw_spd_mip1; +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_2, rg16f) uniform image2D rw_spd_mip2; +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_3, rg16f) uniform image2D rw_spd_mip3; +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4, rg16f) uniform image2D rw_spd_mip4; +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5, rg16f) coherent uniform image2D rw_spd_mip5; + +FfxFloat32x2 RWLoadPyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 index) +{ +#define LOAD(idx) \ + if (index == idx) \ + { \ + return imageLoad(rw_spd_mip##idx, iPxPos).xy; \ + } + LOAD(0); + LOAD(1); + LOAD(2); + LOAD(3); + LOAD(4); + LOAD(5); + + return FfxFloat32x2(0.0, 0.0); + +#undef LOAD +} + +void StorePyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 outValue, FFX_PARAMETER_IN FfxUInt32 index) +{ +#define STORE(idx) \ + if (index == idx) \ + { \ + imageStore(rw_spd_mip##idx, iPxPos, vec4(outValue, 0.0, 0.0)); \ + } + + STORE(0); + STORE(1); + STORE(2); + STORE(3); + STORE(4); + STORE(5); + +#undef STORE +} +#endif + +#if defined FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC, r32ui) coherent uniform uimage2D rw_spd_global_atomic; + +void SPD_IncreaseAtomicCounter(inout FfxUInt32 spdCounter) +{ + spdCounter = imageAtomicAdd(rw_spd_global_atomic, ivec2(0, 0), 1); +} + +void SPD_ResetAtomicCounter() +{ + imageStore(rw_spd_global_atomic, ivec2(0, 0), uvec4(0)); +} +#endif + +#endif // #if defined(FFX_GPU) diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_glsl.h.meta b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_glsl.h.meta new file mode 100644 index 0000000..fa5ae0d --- /dev/null +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_glsl.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 792c5fd8e080de44a8d66c330e46d7c0 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h index 13b317a..8465f1b 100644 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,7 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - #include "ffx_fsr3upscaler_resources.h" #if defined(FFX_GPU) @@ -38,9 +38,7 @@ #define FFX_PREFER_WAVE64 #endif // FFX_PREFER_WAVE64 -#if defined(FFX_GPU) #pragma warning(disable: 3205) // conversion from larger type to smaller -#endif // #if defined(FFX_GPU) #define DECLARE_SRV_REGISTER(regIndex) t##regIndex #define DECLARE_UAV_REGISTER(regIndex) u##regIndex @@ -50,31 +48,34 @@ #define FFX_FSR3UPSCALER_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex)) #if defined(FSR3UPSCALER_BIND_CB_FSR3UPSCALER) - cbuffer cbFSR3Upscaler : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_FSR3UPSCALER) - { - FfxInt32x2 iRenderSize; - FfxInt32x2 iMaxRenderSize; - FfxInt32x2 iDisplaySize; - FfxInt32x2 iInputColorResourceDimensions; - FfxInt32x2 iLumaMipDimensions; - FfxInt32 iLumaMipLevelToUse; - FfxInt32 iFrameIndex; - - FfxFloat32x4 fDeviceToViewDepth; - FfxFloat32x2 fJitter; - FfxFloat32x2 fMotionVectorScale; - FfxFloat32x2 fDownscaleFactor; - FfxFloat32x2 fMotionVectorJitterCancellation; - FfxFloat32 fPreExposure; - FfxFloat32 fPreviousFramePreExposure; - FfxFloat32 fTanHalfFOV; - FfxFloat32 fJitterSequenceLength; - FfxFloat32 fDeltaTime; - FfxFloat32 fDynamicResChangeFactor; - FfxFloat32 fViewSpaceToMetersFactor; - - FfxInt32 iDummy; - }; +cbuffer cbFSR3Upscaler : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_FSR3UPSCALER) +{ + FfxInt32x2 iRenderSize; + FfxInt32x2 iPreviousFrameRenderSize; + + FfxInt32x2 iUpscaleSize; + FfxInt32x2 iPreviousFrameUpscaleSize; + + FfxInt32x2 iMaxRenderSize; + FfxInt32x2 iMaxUpscaleSize; + + FfxFloat32x4 fDeviceToViewDepth; + + FfxFloat32x2 fJitter; + FfxFloat32x2 fPreviousFrameJitter; + + FfxFloat32x2 fMotionVectorScale; + FfxFloat32x2 fDownscaleFactor; + + FfxFloat32x2 fMotionVectorJitterCancellation; + FfxFloat32 fTanHalfFOV; + FfxFloat32 fJitterSequenceLength; + + FfxFloat32 fDeltaTime; + FfxFloat32 fDeltaPreExposure; + FfxFloat32 fViewSpaceToMetersFactor; + FfxFloat32 fFrameIndex; +}; #define FFX_FSR3UPSCALER_CONSTANT_BUFFER_1_SIZE (sizeof(cbFSR3Upscaler) / 4) // Number of 32-bit values. This must be kept in sync with the cbFSR3Upscaler size. @@ -84,39 +85,39 @@ FfxInt32x2 RenderSize() return iRenderSize; } -FfxInt32x2 MaxRenderSize() +FfxInt32x2 PreviousFrameRenderSize() { - return iMaxRenderSize; + return iPreviousFrameRenderSize; } -FfxInt32x2 DisplaySize() +FfxInt32x2 MaxRenderSize() { - return iDisplaySize; + return iMaxRenderSize; } -FfxInt32x2 InputColorResourceDimensions() +FfxInt32x2 UpscaleSize() { - return iInputColorResourceDimensions; + return iUpscaleSize; } -FfxInt32x2 LumaMipDimensions() +FfxInt32x2 PreviousFrameUpscaleSize() { - return iLumaMipDimensions; + return iPreviousFrameUpscaleSize; } -FfxInt32 LumaMipLevelToUse() +FfxInt32x2 MaxUpscaleSize() { - return iLumaMipLevelToUse; + return iMaxUpscaleSize; } -FfxInt32 FrameIndex() +FfxFloat32x2 Jitter() { - return iFrameIndex; + return fJitter; } -FfxFloat32x2 Jitter() +FfxFloat32x2 PreviousFrameJitter() { - return fJitter; + return fPreviousFrameJitter; } FfxFloat32x4 DeviceToViewSpaceTransformFactors() @@ -139,16 +140,6 @@ FfxFloat32x2 MotionVectorJitterCancellation() return fMotionVectorJitterCancellation; } -FfxFloat32 PreExposure() -{ - return fPreExposure; -} - -FfxFloat32 PreviousFramePreExposure() -{ - return fPreviousFramePreExposure; -} - FfxFloat32 TanHalfFoV() { return fTanHalfFOV; @@ -164,22 +155,28 @@ FfxFloat32 DeltaTime() return fDeltaTime; } -FfxFloat32 DynamicResChangeFactor() +FfxFloat32 DeltaPreExposure() { - return fDynamicResChangeFactor; + return fDeltaPreExposure; } FfxFloat32 ViewSpaceToMetersFactor() { return fViewSpaceToMetersFactor; } + +FfxFloat32 FrameIndex() +{ + return fFrameIndex; +} + #endif // #if defined(FSR3UPSCALER_BIND_CB_FSR3UPSCALER) #define FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(p) FFX_FSR3UPSCALER_ROOTSIG_STR(p) #define FFX_FSR3UPSCALER_ROOTSIG_STR(p) #p #define FFX_FSR3UPSCALER_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \ "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \ - "RootConstants(num32BitConstants=" FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_CONSTANT_BUFFER_1_SIZE) ", b0), " \ + "CBV(b0), " \ "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \ "addressU = TEXTURE_ADDRESS_CLAMP, " \ "addressV = TEXTURE_ADDRESS_CLAMP, " \ @@ -197,8 +194,8 @@ FfxFloat32 ViewSpaceToMetersFactor() #define FFX_FSR3UPSCALER_CB2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \ "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \ - "RootConstants(num32BitConstants=" FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_CONSTANT_BUFFER_1_SIZE) ", b0), " \ - "RootConstants(num32BitConstants=" FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_CONSTANT_BUFFER_2_SIZE) ", b1), " \ + "CBV(b0), " \ + "CBV(b1), " \ "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \ "addressU = TEXTURE_ADDRESS_CLAMP, " \ "addressV = TEXTURE_ADDRESS_CLAMP, " \ @@ -322,231 +319,108 @@ FfxUInt32x2 SPD_RenderSize() } #endif // #if defined(FSR3UPSCALER_BIND_CB_SPD) -// Declare and sample camera buffers as regular textures, unless overridden -#if !defined(UNITY_FSR3_TEX2D) -#define UNITY_FSR3_TEX2D(type) Texture2D -#endif -#if !defined(UNITY_FSR3_RWTEX2D) -#define UNITY_FSR3_RWTEX2D(type) RWTexture2D -#endif -#if !defined(UNITY_FSR3_POS) -#define UNITY_FSR3_POS(pxPos) (pxPos) -#endif -#if !defined(UNITY_FSR3_UV) -#define UNITY_FSR3_UV(uv) (uv) -#endif - SamplerState s_PointClamp : register(s0); SamplerState s_LinearClamp : register(s1); - // SRVs - #if defined FSR3UPSCALER_BIND_SRV_INPUT_COLOR - UNITY_FSR3_TEX2D(FfxFloat32x4) r_input_color_jittered : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_COLOR); - #endif - #if defined FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY - UNITY_FSR3_TEX2D(FfxFloat32x4) r_input_opaque_only : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY); - #endif - #if defined FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS - UNITY_FSR3_TEX2D(FfxFloat32x4) r_input_motion_vectors : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS); - #endif - #if defined FSR3UPSCALER_BIND_SRV_INPUT_DEPTH - UNITY_FSR3_TEX2D(FfxFloat32) r_input_depth : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_DEPTH); - #endif - #if defined FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE - Texture2D r_input_exposure : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE); - #endif - #if defined FSR3UPSCALER_BIND_SRV_AUTO_EXPOSURE - Texture2D r_auto_exposure : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_AUTO_EXPOSURE); - #endif - #if defined FSR3UPSCALER_BIND_SRV_REACTIVE_MASK - UNITY_FSR3_TEX2D(FfxFloat32) r_reactive_mask : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_REACTIVE_MASK); - #endif - #if defined FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK - UNITY_FSR3_TEX2D(FfxFloat32) r_transparency_and_composition_mask : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK); - #endif - #if defined FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH - Texture2D r_reconstructed_previous_nearest_depth : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH); - #endif - #if defined FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS - Texture2D r_dilated_motion_vectors : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS); - #endif - #if defined FSR3UPSCALER_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS - Texture2D r_previous_dilated_motion_vectors : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS); - #endif - #if defined FSR3UPSCALER_BIND_SRV_DILATED_DEPTH - Texture2D r_dilated_depth : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_DILATED_DEPTH); - #endif - #if defined FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED - Texture2D r_internal_upscaled_color : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED); - #endif - #if defined FSR3UPSCALER_BIND_SRV_LOCK_STATUS - Texture2D r_lock_status : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_LOCK_STATUS); - #endif - #if defined FSR3UPSCALER_BIND_SRV_LOCK_INPUT_LUMA - Texture2D r_lock_input_luma : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_LOCK_INPUT_LUMA); - #endif - #if defined FSR3UPSCALER_BIND_SRV_NEW_LOCKS - Texture2D r_new_locks : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_NEW_LOCKS); - #endif - #if defined FSR3UPSCALER_BIND_SRV_PREPARED_INPUT_COLOR - Texture2D r_prepared_input_color : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_PREPARED_INPUT_COLOR); - #endif - #if defined FSR3UPSCALER_BIND_SRV_LUMA_HISTORY - Texture2D r_luma_history : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_LUMA_HISTORY); - #endif - #if defined FSR3UPSCALER_BIND_SRV_RCAS_INPUT - Texture2D r_rcas_input : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_RCAS_INPUT); - #endif - #if defined FSR3UPSCALER_BIND_SRV_LANCZOS_LUT - Texture2D r_lanczos_lut : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_LANCZOS_LUT); - #endif - #if defined FSR3UPSCALER_BIND_SRV_SCENE_LUMINANCE_MIPS - Texture2D r_imgMips : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_SCENE_LUMINANCE_MIPS); - #endif - #if defined FSR3UPSCALER_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT - Texture2D r_upsample_maximum_bias_lut : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT); - #endif - #if defined FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS - Texture2D r_dilated_reactive_masks : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS); - #endif - - #if defined FSR3UPSCALER_BIND_SRV_PREV_PRE_ALPHA_COLOR - Texture2D r_input_prev_color_pre_alpha : FFX_FSR3UPSCALER_DECLARE_SRV(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR); - #endif - #if defined FSR3UPSCALER_BIND_SRV_PREV_POST_ALPHA_COLOR - Texture2D r_input_prev_color_post_alpha : FFX_FSR3UPSCALER_DECLARE_SRV(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR); - #endif - - // UAV declarations - #if defined FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH - RWTexture2D rw_reconstructed_previous_nearest_depth : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH); - #endif - #if defined FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS - RWTexture2D rw_dilated_motion_vectors : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS); - #endif - #if defined FSR3UPSCALER_BIND_UAV_DILATED_DEPTH - RWTexture2D rw_dilated_depth : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_DILATED_DEPTH); - #endif - #if defined FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED - RWTexture2D rw_internal_upscaled_color : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED); - #endif - #if defined FSR3UPSCALER_BIND_UAV_LOCK_STATUS - RWTexture2D rw_lock_status : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_LOCK_STATUS); - #endif - #if defined FSR3UPSCALER_BIND_UAV_LOCK_INPUT_LUMA - RWTexture2D rw_lock_input_luma : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_LOCK_INPUT_LUMA); - #endif - #if defined FSR3UPSCALER_BIND_UAV_NEW_LOCKS - RWTexture2D rw_new_locks : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_NEW_LOCKS); - #endif - #if defined FSR3UPSCALER_BIND_UAV_PREPARED_INPUT_COLOR - RWTexture2D rw_prepared_input_color : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_PREPARED_INPUT_COLOR); - #endif - #if defined FSR3UPSCALER_BIND_UAV_LUMA_HISTORY - RWTexture2D rw_luma_history : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_LUMA_HISTORY); - #endif - #if defined FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT - UNITY_FSR3_RWTEX2D(FfxFloat32x4) rw_upscaled_output : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT); - #endif - #if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE - globallycoherent RWTexture2D rw_img_mip_shading_change : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE); - #endif - #if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5 - globallycoherent RWTexture2D rw_img_mip_5 : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5); - #endif - #if defined FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS - RWTexture2D rw_dilated_reactive_masks : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS); - #endif - #if defined FSR3UPSCALER_BIND_UAV_EXPOSURE - RWTexture2D rw_exposure : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_EXPOSURE); - #endif - #if defined FSR3UPSCALER_BIND_UAV_AUTO_EXPOSURE - RWTexture2D rw_auto_exposure : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_AUTO_EXPOSURE); - #endif - #if defined FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC - globallycoherent RWTexture2D rw_spd_global_atomic : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC); - #endif - - #if defined FSR3UPSCALER_BIND_UAV_AUTOREACTIVE - RWTexture2D rw_output_autoreactive : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_AUTOREACTIVE); - #endif - #if defined FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION - RWTexture2D rw_output_autocomposition : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION); - #endif - #if defined FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR - RWTexture2D rw_output_prev_color_pre_alpha : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR); - #endif - #if defined FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR - RWTexture2D rw_output_prev_color_post_alpha : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR); - #endif - -#if defined(FSR3UPSCALER_BIND_SRV_SCENE_LUMINANCE_MIPS) -FfxFloat32 LoadMipLuma(FfxUInt32x2 iPxPos, FfxUInt32 mipLevel) -{ - return r_imgMips.mips[mipLevel][iPxPos]; -} -#endif - -#if defined(FSR3UPSCALER_BIND_SRV_SCENE_LUMINANCE_MIPS) -FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxUInt32 mipLevel) -{ - return r_imgMips.SampleLevel(s_LinearClamp, fUV, mipLevel); +#if defined(FSR3UPSCALER_BIND_SRV_SPD_MIPS) +Texture2D r_spd_mips : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_SPD_MIPS); + +FfxInt32x2 GetSPDMipDimensions(FfxUInt32 uMipLevel) +{ + FfxUInt32 uWidth; + FfxUInt32 uHeight; + FfxUInt32 uLevels; + r_spd_mips.GetDimensions(uMipLevel, uWidth, uHeight, uLevels); + + return FfxInt32x2(uWidth, uHeight); +} + +FfxFloat32x2 SampleSPDMipLevel(FfxFloat32x2 fUV, FfxUInt32 mipLevel) +{ + return r_spd_mips.SampleLevel(s_LinearClamp, fUV, mipLevel); } #endif #if defined(FSR3UPSCALER_BIND_SRV_INPUT_DEPTH) +Texture2D r_input_depth : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_DEPTH); + FfxFloat32 LoadInputDepth(FfxUInt32x2 iPxPos) { - return r_input_depth[UNITY_FSR3_POS(iPxPos)]; + return r_input_depth[iPxPos]; } -#endif -#if defined(FSR3UPSCALER_BIND_SRV_INPUT_DEPTH) FfxFloat32 SampleInputDepth(FfxFloat32x2 fUV) { - return r_input_depth.SampleLevel(s_LinearClamp, UNITY_FSR3_UV(fUV), 0).x; + return r_input_depth.SampleLevel(s_LinearClamp, fUV, 0).x; } #endif #if defined(FSR3UPSCALER_BIND_SRV_REACTIVE_MASK) +Texture2D r_reactive_mask : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_REACTIVE_MASK); + FfxFloat32 LoadReactiveMask(FfxUInt32x2 iPxPos) { - return r_reactive_mask[UNITY_FSR3_POS(iPxPos)]; + return r_reactive_mask[iPxPos]; +} + +FfxInt32x2 GetReactiveMaskResourceDimensions() +{ + FfxUInt32 uWidth; + FfxUInt32 uHeight; + r_reactive_mask.GetDimensions(uWidth, uHeight); + + return FfxInt32x2(uWidth, uHeight); +} + +FfxFloat32 SampleReactiveMask(FfxFloat32x2 fUV) +{ + return r_reactive_mask.SampleLevel(s_LinearClamp, fUV, 0).x; } #endif #if defined(FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) +Texture2D r_transparency_and_composition_mask : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK); + FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos) { - return r_transparency_and_composition_mask[UNITY_FSR3_POS(iPxPos)]; + return r_transparency_and_composition_mask[iPxPos]; } -#endif -#if defined(FSR3UPSCALER_BIND_SRV_INPUT_COLOR) -FfxFloat32x3 LoadInputColor(FfxUInt32x2 iPxPos) +FfxInt32x2 GetTransparencyAndCompositionMaskResourceDimensions() { - return r_input_color_jittered[UNITY_FSR3_POS(iPxPos)].rgb; + FfxUInt32 uWidth; + FfxUInt32 uHeight; + r_transparency_and_composition_mask.GetDimensions(uWidth, uHeight); + + return FfxInt32x2(uWidth, uHeight); +} + +FfxFloat32 SampleTransparencyAndCompositionMask(FfxFloat32x2 fUV) +{ + return r_transparency_and_composition_mask.SampleLevel(s_LinearClamp, fUV, 0).x; } #endif #if defined(FSR3UPSCALER_BIND_SRV_INPUT_COLOR) -FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV) +Texture2D r_input_color_jittered : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_COLOR); + +FfxFloat32x3 LoadInputColor(FfxUInt32x2 iPxPos) { - return r_input_color_jittered.SampleLevel(s_LinearClamp, UNITY_FSR3_UV(fUV), 0).rgb; + return r_input_color_jittered[iPxPos].rgb; } -#endif -#if defined(FSR3UPSCALER_BIND_SRV_PREPARED_INPUT_COLOR) -FfxFloat32x3 LoadPreparedInputColor(FfxUInt32x2 iPxPos) +FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV) { - return r_prepared_input_color[iPxPos].xyz; + return r_input_color_jittered.SampleLevel(s_LinearClamp, fUV, 0).rgb; } #endif #if defined(FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS) +Texture2D r_input_motion_vectors : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS); + FfxFloat32x2 LoadInputMotionVector(FfxUInt32x2 iPxDilatedMotionVectorPos) { - FfxFloat32x2 fSrcMotionVector = r_input_motion_vectors[UNITY_FSR3_POS(iPxDilatedMotionVectorPos)].xy; + FfxFloat32x2 fSrcMotionVector = r_input_motion_vectors[iPxDilatedMotionVectorPos].xy; FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale(); @@ -559,13 +433,22 @@ FfxFloat32x2 LoadInputMotionVector(FfxUInt32x2 iPxDilatedMotionVectorPos) #endif #if defined(FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED) +Texture2D r_internal_upscaled_color : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED); + FfxFloat32x4 LoadHistory(FfxUInt32x2 iPxHistory) { return r_internal_upscaled_color[iPxHistory]; } + +FfxFloat32x4 SampleHistory(FfxFloat32x2 fUV) +{ + return r_internal_upscaled_color.SampleLevel(s_LinearClamp, fUV, 0); +} #endif #if defined(FSR3UPSCALER_BIND_UAV_LUMA_HISTORY) +RWTexture2D rw_luma_history : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_LUMA_HISTORY); + void StoreLumaHistory(FfxUInt32x2 iPxPos, FfxFloat32x4 fLumaHistory) { rw_luma_history[iPxPos] = fLumaHistory; @@ -573,29 +456,41 @@ void StoreLumaHistory(FfxUInt32x2 iPxPos, FfxFloat32x4 fLumaHistory) #endif #if defined(FSR3UPSCALER_BIND_SRV_LUMA_HISTORY) +Texture2D r_luma_history : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_LUMA_HISTORY); + +FfxFloat32x4 LoadLumaHistory(FfxInt32x2 iPxPos) +{ + return r_luma_history[iPxPos]; +} + FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV) { return r_luma_history.SampleLevel(s_LinearClamp, fUV, 0); } #endif +#if defined(FSR3UPSCALER_BIND_SRV_RCAS_INPUT) +Texture2D r_rcas_input : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_RCAS_INPUT); + FfxFloat32x4 LoadRCAS_Input(FfxInt32x2 iPxPos) { -#if defined(FSR3UPSCALER_BIND_SRV_RCAS_INPUT) return r_rcas_input[iPxPos]; -#else - return 0.0; -#endif } +FfxFloat32x3 SampleRCAS_Input(FfxFloat32x2 fUV) +{ + return r_rcas_input.SampleLevel(s_LinearClamp, fUV, 0).rgb; +} +#endif + #if defined(FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED) +RWTexture2D rw_internal_upscaled_color : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED); + void StoreReprojectedHistory(FfxUInt32x2 iPxHistory, FfxFloat32x4 fHistory) { rw_internal_upscaled_color[iPxHistory] = fHistory; } -#endif -#if defined(FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED) void StoreInternalColorAndWeight(FfxUInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight) { rw_internal_upscaled_color[iPxPos] = fColorAndWeight; @@ -603,86 +498,200 @@ void StoreInternalColorAndWeight(FfxUInt32x2 iPxPos, FfxFloat32x4 fColorAndWeigh #endif #if defined(FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT) +RWTexture2D rw_upscaled_output : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT); + void StoreUpscaledOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor) { - rw_upscaled_output[UNITY_FSR3_POS(iPxPos)] = FfxFloat32x4(fColor, 1.f); + rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor, 1.f); } #endif -//LOCK_LIFETIME_REMAINING == 0 -//Should make LockInitialLifetime() return a const 1.0f later -#if defined(FSR3UPSCALER_BIND_SRV_LOCK_STATUS) -FfxFloat32x2 LoadLockStatus(FfxUInt32x2 iPxPos) +#if defined(FSR3UPSCALER_BIND_SRV_ACCUMULATION) +Texture2D r_accumulation : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_ACCUMULATION); + +FfxFloat32 SampleAccumulation(FfxFloat32x2 fUV) { - return r_lock_status[iPxPos]; + return r_accumulation.SampleLevel(s_LinearClamp, fUV, 0); } #endif -#if defined(FSR3UPSCALER_BIND_UAV_LOCK_STATUS) -void StoreLockStatus(FfxUInt32x2 iPxPos, FfxFloat32x2 fLockStatus) +#if defined(FSR3UPSCALER_BIND_UAV_ACCUMULATION) +RWTexture2D rw_accumulation : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_ACCUMULATION); + +void StoreAccumulation(FfxUInt32x2 iPxPos, FfxFloat32 fAccumulation) { - rw_lock_status[iPxPos] = fLockStatus; + rw_accumulation[iPxPos] = fAccumulation; } #endif -#if defined(FSR3UPSCALER_BIND_SRV_LOCK_INPUT_LUMA) -FfxFloat32 LoadLockInputLuma(FfxUInt32x2 iPxPos) +#if defined(FSR3UPSCALER_BIND_SRV_SHADING_CHANGE) +Texture2D r_shading_change : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_SHADING_CHANGE); + +FfxFloat32 LoadShadingChange(FfxUInt32x2 iPxPos) +{ + return r_shading_change[iPxPos]; +} + +FfxFloat32 SampleShadingChange(FfxFloat32x2 fUV) { - return r_lock_input_luma[iPxPos]; + return r_shading_change.SampleLevel(s_LinearClamp, fUV, 0); } #endif -#if defined(FSR3UPSCALER_BIND_UAV_LOCK_INPUT_LUMA) -void StoreLockInputLuma(FfxUInt32x2 iPxPos, FfxFloat32 fLuma) +#if defined(FSR3UPSCALER_BIND_UAV_SHADING_CHANGE) +RWTexture2D rw_shading_change : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SHADING_CHANGE); + +void StoreShadingChange(FfxUInt32x2 iPxPos, FfxFloat32 fShadingChange) { - rw_lock_input_luma[iPxPos] = fLuma; + rw_shading_change[iPxPos] = fShadingChange; } #endif -#if defined(FSR3UPSCALER_BIND_SRV_NEW_LOCKS) -FfxFloat32 LoadNewLocks(FfxUInt32x2 iPxPos) +#if defined(FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH) +Texture2D r_farthest_depth : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH); + +FfxInt32x2 GetFarthestDepthResourceDimensions() { - return r_new_locks[iPxPos]; + FfxUInt32 uWidth; + FfxUInt32 uHeight; + r_farthest_depth.GetDimensions(uWidth, uHeight); + + return FfxInt32x2(uWidth, uHeight); +} + +FfxFloat32 LoadFarthestDepth(FfxUInt32x2 iPxPos) +{ + return r_farthest_depth[iPxPos]; +} + +FfxFloat32 SampleFarthestDepth(FfxFloat32x2 fUV) +{ + return r_farthest_depth.SampleLevel(s_LinearClamp, fUV, 0); } #endif -#if defined(FSR3UPSCALER_BIND_UAV_NEW_LOCKS) -FfxFloat32 LoadRwNewLocks(FfxUInt32x2 iPxPos) +#if defined(FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH) +RWTexture2D rw_farthest_depth : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH); + +void StoreFarthestDepth(FfxUInt32x2 iPxPos, FfxFloat32 fDepth) { - return rw_new_locks[iPxPos]; + rw_farthest_depth[iPxPos] = fDepth; } #endif -#if defined(FSR3UPSCALER_BIND_UAV_NEW_LOCKS) -void StoreNewLocks(FfxUInt32x2 iPxPos, FfxFloat32 newLock) +#if defined(FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH_MIP1) +Texture2D r_farthest_depth_mip1 : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH_MIP1); + +FfxInt32x2 GetFarthestDepthMip1ResourceDimensions() { - rw_new_locks[iPxPos] = newLock; + FfxUInt32 uWidth; + FfxUInt32 uHeight; + r_farthest_depth_mip1.GetDimensions(uWidth, uHeight); + + return FfxInt32x2(uWidth, uHeight); +} + +FfxFloat32 LoadFarthestDepthMip1(FfxUInt32x2 iPxPos) +{ + return r_farthest_depth_mip1[iPxPos]; +} + +FfxFloat32 SampleFarthestDepthMip1(FfxFloat32x2 fUV) +{ + return r_farthest_depth_mip1.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH_MIP1) +RWTexture2D rw_farthest_depth_mip1 : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH_MIP1); + +void StoreFarthestDepthMip1(FfxUInt32x2 iPxPos, FfxFloat32 fDepth) +{ + rw_farthest_depth_mip1[iPxPos] = fDepth; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_CURRENT_LUMA) +Texture2D r_current_luma : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_CURRENT_LUMA); + +FfxFloat32 LoadCurrentLuma(FfxUInt32x2 iPxPos) +{ + return r_current_luma[iPxPos]; +} + +FfxFloat32 SampleCurrentLuma(FfxFloat32x2 uv) +{ + return r_current_luma.SampleLevel(s_LinearClamp, uv, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_CURRENT_LUMA) +RWTexture2D rw_current_luma : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_CURRENT_LUMA); + +void StoreCurrentLuma(FfxUInt32x2 iPxPos, FfxFloat32 fLuma) +{ + rw_current_luma[iPxPos] = fLuma; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_LUMA_INSTABILITY) +Texture2D r_luma_instability : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_LUMA_INSTABILITY); + +FfxFloat32 SampleLumaInstability(FfxFloat32x2 uv) +{ + return r_luma_instability.SampleLevel(s_LinearClamp, uv, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_LUMA_INSTABILITY) +RWTexture2D rw_luma_instability : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_LUMA_INSTABILITY); + +void StoreLumaInstability(FfxUInt32x2 iPxPos, FfxFloat32 fLumaInstability) +{ + rw_luma_instability[iPxPos] = fLumaInstability; } #endif -#if defined(FSR3UPSCALER_BIND_UAV_PREPARED_INPUT_COLOR) -void StorePreparedInputColor(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped) +#if defined(FSR3UPSCALER_BIND_SRV_PREVIOUS_LUMA) +Texture2D r_previous_luma : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_PREVIOUS_LUMA); + +FfxFloat32 LoadPreviousLuma(FfxUInt32x2 iPxPos) +{ + return r_previous_luma[iPxPos]; +} + +FfxFloat32 SamplePreviousLuma(FfxFloat32x2 uv) { - rw_prepared_input_color[iPxPos] = fTonemapped; + return r_previous_luma.SampleLevel(s_LinearClamp, uv, 0); } #endif -#if defined(FSR3UPSCALER_BIND_SRV_PREPARED_INPUT_COLOR) -FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV) +#if defined(FSR3UPSCALER_BIND_SRV_NEW_LOCKS) +Texture2D r_new_locks : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_NEW_LOCKS); + +FfxFloat32 LoadNewLocks(FfxUInt32x2 iPxPos) { - return r_prepared_input_color.SampleLevel(s_LinearClamp, fUV, 0).w; + return r_new_locks[iPxPos]; } #endif -#if defined(FSR3UPSCALER_BIND_SRV_LOCK_STATUS) -FfxFloat32x2 SampleLockStatus(FfxFloat32x2 fUV) +#if defined(FSR3UPSCALER_BIND_UAV_NEW_LOCKS) +RWTexture2D rw_new_locks : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_NEW_LOCKS); + +FfxFloat32 LoadRwNewLocks(FfxUInt32x2 iPxPos) +{ + return rw_new_locks[iPxPos]; +} + +void StoreNewLocks(FfxUInt32x2 iPxPos, FfxFloat32 newLock) { - FfxFloat32x2 fLockStatus = r_lock_status.SampleLevel(s_LinearClamp, fUV, 0); - return fLockStatus; + rw_new_locks[iPxPos] = newLock; } #endif #if defined(FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) +Texture2D r_reconstructed_previous_nearest_depth : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH); + FfxFloat32 LoadReconstructedPrevDepth(FfxUInt32x2 iPxPos) { return asfloat(r_reconstructed_previous_nearest_depth[iPxPos]); @@ -690,19 +699,19 @@ FfxFloat32 LoadReconstructedPrevDepth(FfxUInt32x2 iPxPos) #endif #if defined(FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) +RWTexture2D rw_reconstructed_previous_nearest_depth : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH); + void StoreReconstructedDepth(FfxUInt32x2 iPxSample, FfxFloat32 fDepth) { FfxUInt32 uDepth = asuint(fDepth); - #if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH - InterlockedMax(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); - #else - InterlockedMin(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); // min for standard, max for inverted depth - #endif -} +#if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH + InterlockedMax(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); +#else + InterlockedMin(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); // min for standard, max for inverted depth #endif +} -#if defined(FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) void SetReconstructedDepth(FfxUInt32x2 iPxSample, const FfxUInt32 uValue) { rw_reconstructed_previous_nearest_depth[iPxSample] = uValue; @@ -710,6 +719,8 @@ void SetReconstructedDepth(FfxUInt32x2 iPxSample, const FfxUInt32 uValue) #endif #if defined(FSR3UPSCALER_BIND_UAV_DILATED_DEPTH) +RWTexture2D rw_dilated_depth : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_DILATED_DEPTH); + void StoreDilatedDepth(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth) { rw_dilated_depth[iPxPos] = fDepth; @@ -717,6 +728,8 @@ void StoreDilatedDepth(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN Ffx #endif #if defined(FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS) +RWTexture2D rw_dilated_motion_vectors : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS); + void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector) { rw_dilated_motion_vectors[iPxPos] = fMotionVector; @@ -724,32 +737,36 @@ void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER #endif #if defined(FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS) -FfxFloat32x2 LoadDilatedMotionVector(FfxUInt32x2 iPxInput) -{ - return r_dilated_motion_vectors[iPxInput].xy; -} -#endif +Texture2D r_dilated_motion_vectors : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS); -#if defined(FSR3UPSCALER_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) -FfxFloat32x2 LoadPreviousDilatedMotionVector(FfxUInt32x2 iPxInput) +FfxFloat32x2 LoadDilatedMotionVector(FfxUInt32x2 iPxInput) { - return r_previous_dilated_motion_vectors[iPxInput].xy; + return r_dilated_motion_vectors[iPxInput]; } -FfxFloat32x2 SamplePreviousDilatedMotionVector(FfxFloat32x2 uv) +FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV) { - return r_previous_dilated_motion_vectors.SampleLevel(s_LinearClamp, uv, 0).xy; + return r_dilated_motion_vectors.SampleLevel(s_LinearClamp, fUV, 0); } #endif #if defined(FSR3UPSCALER_BIND_SRV_DILATED_DEPTH) +Texture2D r_dilated_depth : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_DILATED_DEPTH); + FfxFloat32 LoadDilatedDepth(FfxUInt32x2 iPxInput) { return r_dilated_depth[iPxInput]; } + +FfxFloat32 SampleDilatedDepth(FfxFloat32x2 fUV) +{ + return r_dilated_depth.SampleLevel(s_LinearClamp, fUV, 0); +} #endif #if defined(FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE) +Texture2D r_input_exposure : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE); + FfxFloat32 Exposure() { FfxFloat32 exposure = r_input_exposure[FfxUInt32x2(0, 0)].x; @@ -762,17 +779,9 @@ FfxFloat32 Exposure() } #endif -#if defined(FSR3UPSCALER_BIND_SRV_AUTO_EXPOSURE) -FfxFloat32 AutoExposure() -{ - FfxFloat32 exposure = r_auto_exposure[FfxUInt32x2(0, 0)].x; - - if (exposure == 0.0f) { - exposure = 1.0f; - } - - return exposure; -} +// BEGIN: FSR3UPSCALER_BIND_SRV_LANCZOS_LUT +#if defined(FSR3UPSCALER_BIND_SRV_LANCZOS_LUT) +Texture2D r_lanczos_lut : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_LANCZOS_LUT); #endif FfxFloat32 SampleLanczos2Weight(FfxFloat32 x) @@ -783,44 +792,38 @@ FfxFloat32 SampleLanczos2Weight(FfxFloat32 x) return 0.f; #endif } - -#if defined(FSR3UPSCALER_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) -FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv) -{ - // Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range. - return FfxFloat32(2.0) * r_upsample_maximum_bias_lut.SampleLevel(s_LinearClamp, abs(uv) * 2.0, 0); -} -#endif +// END: FSR3UPSCALER_BIND_SRV_LANCZOS_LUT #if defined(FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS) -FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV) -{ - return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0); -} -#endif +Texture2D r_dilated_reactive_masks : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS); -#if defined(FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS) -FfxFloat32x2 LoadDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos) +FfxFloat32x4 SampleDilatedReactiveMasks(FfxFloat32x2 fUV) { - return r_dilated_reactive_masks[iPxPos]; + return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0); } #endif #if defined(FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS) -void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fDilatedReactiveMasks) +RWTexture2D rw_dilated_reactive_masks : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS); + +void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fDilatedReactiveMasks) { rw_dilated_reactive_masks[iPxPos] = fDilatedReactiveMasks; } #endif #if defined(FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY) +Texture2D r_input_opaque_only : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY); + FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) { - return r_input_opaque_only[UNITY_FSR3_POS(iPxPos)].xyz; + return r_input_opaque_only[iPxPos].xyz; } #endif #if defined(FSR3UPSCALER_BIND_SRV_PREV_PRE_ALPHA_COLOR) +Texture2D r_input_prev_color_pre_alpha : FFX_FSR3UPSCALER_DECLARE_SRV(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR); + FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) { return r_input_prev_color_pre_alpha[iPxPos]; @@ -828,14 +831,20 @@ FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) #endif #if defined(FSR3UPSCALER_BIND_SRV_PREV_POST_ALPHA_COLOR) +Texture2D r_input_prev_color_post_alpha : FFX_FSR3UPSCALER_DECLARE_SRV(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR); + FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) { return r_input_prev_color_post_alpha[iPxPos]; } #endif -#if defined(FSR3UPSCALER_BIND_UAV_AUTOREACTIVE) -#if defined(FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION) +#if defined(FSR3UPSCALER_BIND_UAV_AUTOREACTIVE) && \ + defined(FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION) + +RWTexture2D rw_output_autoreactive : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_AUTOREACTIVE); +RWTexture2D rw_output_autocomposition : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION); + void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive) { rw_output_autoreactive[iPxPos] = fReactive.x; @@ -843,86 +852,112 @@ void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FF rw_output_autocomposition[iPxPos] = fReactive.y; } #endif -#endif #if defined(FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR) +RWTexture2D rw_output_prev_color_pre_alpha : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR); + void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) { rw_output_prev_color_pre_alpha[iPxPos] = color; - } #endif #if defined(FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR) +RWTexture2D rw_output_prev_color_post_alpha : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR); + void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) { rw_output_prev_color_post_alpha[iPxPos] = color; } #endif -FfxFloat32x2 SPD_LoadExposureBuffer() +#if defined(FSR3UPSCALER_BIND_UAV_FRAME_INFO) +RWTexture2D rw_frame_info : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_FRAME_INFO); + +FfxFloat32x4 LoadFrameInfo() { -#if defined FSR3UPSCALER_BIND_UAV_AUTO_EXPOSURE - return rw_auto_exposure[FfxInt32x2(0, 0)]; -#else - return FfxFloat32x2(0.f, 0.f); -#endif // #if defined FSR3UPSCALER_BIND_UAV_AUTO_EXPOSURE + return rw_frame_info[FfxInt32x2(0, 0)]; } -void SPD_SetExposureBuffer(FfxFloat32x2 value) +void StoreFrameInfo(FfxFloat32x4 fInfo) { -#if defined FSR3UPSCALER_BIND_UAV_AUTO_EXPOSURE - rw_auto_exposure[FfxInt32x2(0, 0)] = value; -#endif // #if defined FSR3UPSCALER_BIND_UAV_AUTO_EXPOSURE + rw_frame_info[FfxInt32x2(0, 0)] = fInfo; } +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_FRAME_INFO) +Texture2D r_frame_info : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_FRAME_INFO); -FfxFloat32x4 SPD_LoadMipmap5(FfxInt32x2 iPxPos) +FfxFloat32x4 FrameInfo() { -#if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5 - return FfxFloat32x4(rw_img_mip_5[iPxPos], 0, 0, 0); -#else - return FfxFloat32x4(0.f, 0.f, 0.f, 0.f); -#endif // #if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5 -} - -void SPD_SetMipmap(FfxInt32x2 iPxPos, FfxUInt32 slice, FfxFloat32 value) -{ - switch (slice) - { - case FFX_FSR3UPSCALER_SHADING_CHANGE_MIP_LEVEL: -#if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE - rw_img_mip_shading_change[iPxPos] = value; -#endif // #if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE - break; - case 5: -#if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5 - rw_img_mip_5[iPxPos] = value; -#endif // #if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5 - break; - default: - - // avoid flattened side effect -#if defined(FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE) - rw_img_mip_shading_change[iPxPos] = rw_img_mip_shading_change[iPxPos]; -#elif defined(FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5) - rw_img_mip_5[iPxPos] = rw_img_mip_5[iPxPos]; -#endif // #if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5 - break; - } + return r_frame_info[FfxInt32x2(0, 0)]; } +#endif -void SPD_IncreaseAtomicCounter(inout FfxUInt32 spdCounter) +#if defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_0) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_1) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_2) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_3) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5) + +RWTexture2D rw_spd_mip0 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_0); +RWTexture2D rw_spd_mip1 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_1); +RWTexture2D rw_spd_mip2 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_2); +RWTexture2D rw_spd_mip3 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_3); +RWTexture2D rw_spd_mip4 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4); +globallycoherent RWTexture2D rw_spd_mip5 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5); + +FfxFloat32x2 RWLoadPyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 index) { +#define LOAD(idx) \ + if (index == idx) \ + { \ + return rw_spd_mip##idx[iPxPos]; \ + } + LOAD(0); + LOAD(1); + LOAD(2); + LOAD(3); + LOAD(4); + LOAD(5); + + return 0; + +#undef LOAD +} + +void StorePyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 outValue, FFX_PARAMETER_IN FfxUInt32 index) +{ +#define STORE(idx) \ + if (index == idx) \ + { \ + rw_spd_mip##idx[iPxPos] = outValue; \ + } + + STORE(0); + STORE(1); + STORE(2); + STORE(3); + STORE(4); + STORE(5); + +#undef STORE +} +#endif + #if defined FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC +globallycoherent RWTexture2D rw_spd_global_atomic : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC); + +void SPD_IncreaseAtomicCounter(inout FfxUInt32 spdCounter) +{ InterlockedAdd(rw_spd_global_atomic[FfxInt32x2(0, 0)], 1, spdCounter); -#endif // #if defined FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC } void SPD_ResetAtomicCounter() { -#if defined FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC rw_spd_global_atomic[FfxInt32x2(0, 0)] = 0; -#endif // #if defined FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC } +#endif #endif // #if defined(FFX_GPU) diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h index 1f78a29..dd479b1 100644 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,130 +20,131 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - #if !defined(FFX_FSR3UPSCALER_COMMON_H) #define FFX_FSR3UPSCALER_COMMON_H -#if defined(FFX_CPU) || defined(FFX_GPU) -//Locks -#define LOCK_LIFETIME_REMAINING 0 -#define LOCK_TEMPORAL_LUMA 1 -#endif // #if defined(FFX_CPU) || defined(FFX_GPU) - #if defined(FFX_GPU) -FFX_STATIC const FfxFloat32 FSR3UPSCALER_FP16_MIN = 6.10e-05f; -FFX_STATIC const FfxFloat32 FSR3UPSCALER_FP16_MAX = 65504.0f; -FFX_STATIC const FfxFloat32 FSR3UPSCALER_EPSILON = 1e-03f; -FFX_STATIC const FfxFloat32 FSR3UPSCALER_TONEMAP_EPSILON = 1.0f / FSR3UPSCALER_FP16_MAX; -FFX_STATIC const FfxFloat32 FSR3UPSCALER_FLT_MAX = 3.402823466e+38f; -FFX_STATIC const FfxFloat32 FSR3UPSCALER_FLT_MIN = 1.175494351e-38f; - -// treat vector truncation warnings as errors -#pragma warning(error: 3206) +#pragma warning(error : 3206) // treat vector truncation warnings as errors +#pragma warning(disable : 3205) // conversion from larger type to smaller +#pragma warning(disable : 3571) // in ffxPow(f, e), f could be negative -// suppress warnings -#pragma warning(disable: 3205) // conversion from larger type to smaller -#pragma warning(disable: 3571) // in ffxPow(f, e), f could be negative +FFX_STATIC const FfxFloat32 FSR3UPSCALER_FP16_MIN = 6.10e-05f; +FFX_STATIC const FfxFloat32 FSR3UPSCALER_FP16_MAX = 65504.0f; +FFX_STATIC const FfxFloat32 FSR3UPSCALER_EPSILON = FSR3UPSCALER_FP16_MIN; +FFX_STATIC const FfxFloat32 FSR3UPSCALER_TONEMAP_EPSILON = FSR3UPSCALER_FP16_MIN; +FFX_STATIC const FfxFloat32 FSR3UPSCALER_FP32_MAX = 3.402823466e+38f; +FFX_STATIC const FfxFloat32 FSR3UPSCALER_FP32_MIN = 1.175494351e-38f; // Reconstructed depth usage -FFX_STATIC const FfxFloat32 fReconstructedDepthBilinearWeightThreshold = 0.01f; +FFX_STATIC const FfxFloat32 fReconstructedDepthBilinearWeightThreshold = FSR3UPSCALER_EPSILON * 10; + +FfxFloat32 ReconstructedDepthMvPxThreshold(FfxFloat32 fNearestDepthInMeters) +{ + return ffxLerp(0.25f, 0.75f, ffxSaturate(fNearestDepthInMeters / 100.0f)); +} // Accumulation -FFX_STATIC const FfxFloat32 fUpsampleLanczosWeightScale = 1.0f / 12.0f; -FFX_STATIC const FfxFloat32 fMaxAccumulationLanczosWeight = 1.0f; -FFX_STATIC const FfxFloat32 fAverageLanczosWeightPerFrame = 0.74f * fUpsampleLanczosWeightScale; // Average lanczos weight for jitter accumulated samples -FFX_STATIC const FfxFloat32 fAccumulationMaxOnMotion = 3.0f * fUpsampleLanczosWeightScale; +FFX_STATIC const FfxFloat32 fUpsampleLanczosWeightScale = 1.0f / 16.0f; +FFX_STATIC const FfxFloat32 fAverageLanczosWeightPerFrame = 0.74f * fUpsampleLanczosWeightScale; // Average lanczos weight for jitter accumulated samples +FFX_STATIC const FfxFloat32 fAccumulationMaxOnMotion = 3.0f * fUpsampleLanczosWeightScale; -// Auto exposure -FFX_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e8f; +#define SHADING_CHANGE_SET_SIZE 5 +FFX_STATIC const FfxInt32 iShadingChangeMipStart = 0; +FFX_STATIC const FfxFloat32 fShadingChangeSamplePow = 1.0f / 1.0f; -struct AccumulationPassCommonParams -{ - FfxInt32x2 iPxHrPos; - FfxFloat32x2 fHrUv; - FfxFloat32x2 fLrUv_HwSampler; - FfxFloat32x2 fMotionVector; - FfxFloat32x2 fReprojectedHrUv; - FfxFloat32 fHrVelocity; - FfxFloat32 fDepthClipFactor; - FfxFloat32 fDilatedReactiveFactor; - FfxFloat32 fAccumulationMask; - - FfxBoolean bIsResetFrame; - FfxBoolean bIsExistingSample; - FfxBoolean bIsNewSample; -}; -struct LockState +FFX_STATIC const FfxFloat32 fLockThreshold = 1.0f; +FFX_STATIC const FfxFloat32 fLockMax = 2.0f; + +FFX_STATIC const FfxInt32 REACTIVE = 0; +FFX_STATIC const FfxInt32 DISOCCLUSION = 1; +FFX_STATIC const FfxInt32 SHADING_CHANGE = 2; +FFX_STATIC const FfxInt32 ACCUMULAION = 3; + +FFX_STATIC const FfxInt32 FRAME_INFO_EXPOSURE = 0; +FFX_STATIC const FfxInt32 FRAME_INFO_LOG_LUMA = 1; +FFX_STATIC const FfxInt32 FRAME_INFO_SCENE_AVERAGE_LUMA = 2; + +FfxBoolean TonemapFirstFrame() { - FfxBoolean NewLock; //Set for both unique new and re-locked new - FfxBoolean WasLockedPrevFrame; //Set to identify if the pixel was already locked (relock) -}; + const FfxBoolean bEnabled = true; + return FrameIndex() == 0 && bEnabled; +} -void InitializeNewLockSample(FFX_PARAMETER_OUT FfxFloat32x2 fLockStatus) +FfxFloat32 AverageLanczosWeightPerFrame() { - fLockStatus = FfxFloat32x2(0, 0); + return 0.74f; } -#if FFX_HALF -void InitializeNewLockSample(FFX_PARAMETER_OUT FFX_MIN16_F2 fLockStatus) +FfxInt32x2 ShadingChangeRenderSize() { - fLockStatus = FFX_MIN16_F2(0, 0); + return FfxInt32x2(RenderSize() * 0.5f); } -#endif +FfxInt32x2 ShadingChangeMaxRenderSize() +{ + return FfxInt32x2(MaxRenderSize() * 0.5f); +} -void KillLock(FFX_PARAMETER_INOUT FfxFloat32x2 fLockStatus) +FfxInt32x2 PreviousFrameShadingChangeRenderSize() { - fLockStatus[LOCK_LIFETIME_REMAINING] = 0; + return FfxInt32x2(PreviousFrameRenderSize() * 0.5f); } -#if FFX_HALF -void KillLock(FFX_PARAMETER_INOUT FFX_MIN16_F2 fLockStatus) +#if defined(FSR3UPSCALER_BIND_SRV_FRAME_INFO) +FfxFloat32 SceneAverageLuma() { - fLockStatus[LOCK_LIFETIME_REMAINING] = FFX_MIN16_F(0); + return FrameInfo()[FRAME_INFO_SCENE_AVERAGE_LUMA]; } #endif -struct RectificationBox +// Auto exposure +FFX_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e8f; + +struct AccumulationPassCommonParams { - FfxFloat32x3 boxCenter; - FfxFloat32x3 boxVec; - FfxFloat32x3 aabbMin; - FfxFloat32x3 aabbMax; - FfxFloat32 fBoxCenterWeight; -}; -#if FFX_HALF -struct RectificationBoxMin16 -{ - FFX_MIN16_F3 boxCenter; - FFX_MIN16_F3 boxVec; - FFX_MIN16_F3 aabbMin; - FFX_MIN16_F3 aabbMax; - FFX_MIN16_F fBoxCenterWeight; + FfxInt32x2 iPxHrPos; + FfxFloat32x2 fHrUv; + FfxFloat32x2 fLrUvJittered; + FfxFloat32x2 fLrUv_HwSampler; + FfxFloat32x2 fMotionVector; + FfxFloat32x2 fReprojectedHrUv; + FfxFloat32 f4KVelocity; + FfxFloat32 fDisocclusion; + FfxFloat32 fReactiveMask; + FfxFloat32 fShadingChange; + FfxFloat32 fAccumulation; + FfxFloat32 fLumaInstabilityFactor; + FfxFloat32 fFarthestDepthInMeters; + + FfxBoolean bIsExistingSample; + FfxBoolean bIsNewSample; }; -#endif -void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBox rectificationBox) +FfxFloat32 Get4KVelocity(FfxFloat32x2 fMotionVector) { - rectificationBox.fBoxCenterWeight = FfxFloat32(0); - - rectificationBox.boxCenter = FfxFloat32x3(0, 0, 0); - rectificationBox.boxVec = FfxFloat32x3(0, 0, 0); - rectificationBox.aabbMin = FfxFloat32x3(FSR3UPSCALER_FLT_MAX, FSR3UPSCALER_FLT_MAX, FSR3UPSCALER_FLT_MAX); - rectificationBox.aabbMax = -FfxFloat32x3(FSR3UPSCALER_FLT_MAX, FSR3UPSCALER_FLT_MAX, FSR3UPSCALER_FLT_MAX); + return length(fMotionVector * FfxFloat32x2(3840.0f, 2160.0f)); } -#if FFX_HALF -void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox) + +struct RectificationBox { - rectificationBox.fBoxCenterWeight = FFX_MIN16_F(0); + FfxFloat32x3 boxCenter; + FfxFloat32x3 boxVec; + FfxFloat32x3 aabbMin; + FfxFloat32x3 aabbMax; + FfxFloat32 fBoxCenterWeight; +}; - rectificationBox.boxCenter = FFX_MIN16_F3(0, 0, 0); - rectificationBox.boxVec = FFX_MIN16_F3(0, 0, 0); - rectificationBox.aabbMin = FFX_MIN16_F3(FSR3UPSCALER_FP16_MAX, FSR3UPSCALER_FP16_MAX, FSR3UPSCALER_FP16_MAX); - rectificationBox.aabbMax = -FFX_MIN16_F3(FSR3UPSCALER_FP16_MAX, FSR3UPSCALER_FP16_MAX, FSR3UPSCALER_FP16_MAX); -} -#endif +struct AccumulationPassData +{ + RectificationBox clippingBox; + FfxFloat32x3 fUpsampledColor; + FfxFloat32 fUpsampledWeight; + FfxFloat32x3 fHistoryColor; + FfxFloat32 fHistoryWeight; + FfxFloat32 fLock; + FfxFloat32 fLockContributionThisFrame; +}; void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight) { @@ -169,63 +171,26 @@ void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT Re rectificationBox.fBoxCenterWeight += fSampleWeight; } } -#if FFX_HALF -void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight) -{ - rectificationBox.aabbMin = colorSample; - rectificationBox.aabbMax = colorSample; - - FFX_MIN16_F3 weightedSample = colorSample * fSampleWeight; - rectificationBox.boxCenter = weightedSample; - rectificationBox.boxVec = colorSample * weightedSample; - rectificationBox.fBoxCenterWeight = fSampleWeight; -} - -void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight) -{ - if (bInitialSample) { - RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight); - } else { - rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample); - rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample); - - FFX_MIN16_F3 weightedSample = colorSample * fSampleWeight; - rectificationBox.boxCenter += weightedSample; - rectificationBox.boxVec += colorSample * weightedSample; - rectificationBox.fBoxCenterWeight += fSampleWeight; - } -} -#endif void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBox rectificationBox) { - rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FfxFloat32(FSR3UPSCALER_EPSILON) ? rectificationBox.fBoxCenterWeight : FfxFloat32(1.f)); + rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FfxFloat32(FSR3UPSCALER_FP32_MIN) ? rectificationBox.fBoxCenterWeight : FfxFloat32(1.f)); rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight; rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight; FfxFloat32x3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter)); rectificationBox.boxVec = stdDev; } -#if FFX_HALF -void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox) -{ - rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FFX_MIN16_F(FSR3UPSCALER_EPSILON) ? rectificationBox.fBoxCenterWeight : FFX_MIN16_F(1.f)); - rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight; - rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight; - FFX_MIN16_F3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter)); - rectificationBox.boxVec = stdDev; -} -#endif FfxFloat32x3 SafeRcp3(FfxFloat32x3 v) { return (all(FFX_NOT_EQUAL(v, FfxFloat32x3(0, 0, 0)))) ? (FfxFloat32x3(1, 1, 1) / v) : FfxFloat32x3(0, 0, 0); } -#if FFX_HALF -FFX_MIN16_F3 SafeRcp3(FFX_MIN16_F3 v) + +FfxFloat32 MinDividedByMax(const FfxFloat32 v0, const FfxFloat32 v1, const FfxFloat32 fOnZeroReturnValue) { - return (all(FFX_NOT_EQUAL(v, FFX_MIN16_F3(0, 0, 0)))) ? (FFX_MIN16_F3(1, 1, 1) / v) : FFX_MIN16_F3(0, 0, 0); + const FfxFloat32 m = ffxMax(v0, v1); + return m != 0 ? ffxMin(v0, v1) / m : fOnZeroReturnValue; } -#endif FfxFloat32 MinDividedByMax(const FfxFloat32 v0, const FfxFloat32 v1) { @@ -233,14 +198,6 @@ FfxFloat32 MinDividedByMax(const FfxFloat32 v0, const FfxFloat32 v1) return m != 0 ? ffxMin(v0, v1) / m : 0; } -#if FFX_HALF -FFX_MIN16_F MinDividedByMax(const FFX_MIN16_F v0, const FFX_MIN16_F v1) -{ - const FFX_MIN16_F m = ffxMax(v0, v1); - return m != FFX_MIN16_F(0) ? ffxMin(v0, v1) / m : FFX_MIN16_F(0); -} -#endif - FfxFloat32x3 YCoCgToRGB(FfxFloat32x3 fYCoCg) { FfxFloat32x3 fRgb; @@ -252,19 +209,6 @@ FfxFloat32x3 YCoCgToRGB(FfxFloat32x3 fYCoCg) return fRgb; } -#if FFX_HALF -FFX_MIN16_F3 YCoCgToRGB(FFX_MIN16_F3 fYCoCg) -{ - FFX_MIN16_F3 fRgb; - - fRgb = FFX_MIN16_F3( - fYCoCg.x + fYCoCg.y - fYCoCg.z, - fYCoCg.x + fYCoCg.z, - fYCoCg.x - fYCoCg.y - fYCoCg.z); - - return fRgb; -} -#endif FfxFloat32x3 RGBToYCoCg(FfxFloat32x3 fRgb) { @@ -277,30 +221,11 @@ FfxFloat32x3 RGBToYCoCg(FfxFloat32x3 fRgb) return fYCoCg; } -#if FFX_HALF -FFX_MIN16_F3 RGBToYCoCg(FFX_MIN16_F3 fRgb) -{ - FFX_MIN16_F3 fYCoCg; - - fYCoCg = FFX_MIN16_F3( - 0.25 * fRgb.r + 0.5 * fRgb.g + 0.25 * fRgb.b, - 0.5 * fRgb.r - 0.5 * fRgb.b, - -0.25 * fRgb.r + 0.5 * fRgb.g - 0.25 * fRgb.b); - - return fYCoCg; -} -#endif FfxFloat32 RGBToLuma(FfxFloat32x3 fLinearRgb) { return dot(fLinearRgb, FfxFloat32x3(0.2126f, 0.7152f, 0.0722f)); } -#if FFX_HALF -FFX_MIN16_F RGBToLuma(FFX_MIN16_F3 fLinearRgb) -{ - return dot(fLinearRgb, FFX_MIN16_F3(0.2126f, 0.7152f, 0.0722f)); -} -#endif FfxFloat32 RGBToPerceivedLuma(FfxFloat32x3 fLinearRgb) { @@ -316,22 +241,6 @@ FfxFloat32 RGBToPerceivedLuma(FfxFloat32x3 fLinearRgb) return fPercievedLuminance * 0.01f; } -#if FFX_HALF -FFX_MIN16_F RGBToPerceivedLuma(FFX_MIN16_F3 fLinearRgb) -{ - FFX_MIN16_F fLuminance = RGBToLuma(fLinearRgb); - - FFX_MIN16_F fPercievedLuminance = FFX_MIN16_F(0); - if (fLuminance <= FFX_MIN16_F(216.0f / 24389.0f)) { - fPercievedLuminance = fLuminance * FFX_MIN16_F(24389.0f / 27.0f); - } - else { - fPercievedLuminance = ffxPow(fLuminance, FFX_MIN16_F(1.0f / 3.0f)) * FFX_MIN16_F(116.0f) - FFX_MIN16_F(16.0f); - } - - return fPercievedLuminance * FFX_MIN16_F(0.01f); -} -#endif FfxFloat32x3 Tonemap(FfxFloat32x3 fRgb) { @@ -343,42 +252,18 @@ FfxFloat32x3 InverseTonemap(FfxFloat32x3 fRgb) return fRgb / ffxMax(FSR3UPSCALER_TONEMAP_EPSILON, 1.f - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx; } -#if FFX_HALF -FFX_MIN16_F3 Tonemap(FFX_MIN16_F3 fRgb) -{ - return fRgb / (ffxMax(ffxMax(FFX_MIN16_F(0.f), fRgb.r), ffxMax(fRgb.g, fRgb.b)) + FFX_MIN16_F(1.f)).xxx; -} - -FFX_MIN16_F3 InverseTonemap(FFX_MIN16_F3 fRgb) +FfxBoolean IsUvInside(FfxFloat32x2 fUv) { - return fRgb / ffxMax(FFX_MIN16_F(FSR3UPSCALER_TONEMAP_EPSILON), FFX_MIN16_F(1.f) - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx; + return (fUv.x >= 0.0f && fUv.x <= 1.0f) && (fUv.y >= 0.0f && fUv.y <= 1.0f); } -#endif FfxInt32x2 ClampLoad(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize) { FfxInt32x2 result = iPxSample + iPxOffset; - result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x; - result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x; - result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y; - result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y; + result.x = ffxMax(0, ffxMin(result.x, iTextureSize.x - 1)); + result.y = ffxMax(0, ffxMin(result.y, iTextureSize.y - 1)); return result; - - // return ffxMed3(iPxSample + iPxOffset, FfxInt32x2(0, 0), iTextureSize - FfxInt32x2(1, 1)); } -#if FFX_HALF -FFX_MIN16_I2 ClampLoad(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize) -{ - FFX_MIN16_I2 result = iPxSample + iPxOffset; - result.x = (iPxOffset.x < 0) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x; - result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x; - result.y = (iPxOffset.y < 0) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y; - result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y; - return result; - - // return ffxMed3Half(iPxSample + iPxOffset, FFX_MIN16_I2(0, 0), iTextureSize - FFX_MIN16_I2(1, 1)); -} -#endif FfxFloat32x2 ClampUv(FfxFloat32x2 fUv, FfxInt32x2 iTextureSize, FfxInt32x2 iResourceSize) { @@ -393,12 +278,6 @@ FfxBoolean IsOnScreen(FfxInt32x2 pos, FfxInt32x2 size) { return all(FFX_LESS_THAN(FfxUInt32x2(pos), FfxUInt32x2(size))); } -#if FFX_HALF -FfxBoolean IsOnScreen(FFX_MIN16_I2 pos, FFX_MIN16_I2 size) -{ - return all(FFX_LESS_THAN(FFX_MIN16_U2(pos), FFX_MIN16_U2(size))); -} -#endif FfxFloat32 ComputeAutoExposureFromLavg(FfxFloat32 Lavg) { @@ -411,40 +290,16 @@ FfxFloat32 ComputeAutoExposureFromLavg(FfxFloat32 Lavg) const FfxFloat32 q = 0.65f; FfxFloat32 Lmax = (78.0f / (q * S)) * ffxPow(2.0f, ExposureISO100); - return 1 / Lmax; + return 1.0f / Lmax; } -#if FFX_HALF -FFX_MIN16_F ComputeAutoExposureFromLavg(FFX_MIN16_F Lavg) -{ - Lavg = exp(Lavg); - - const FFX_MIN16_F S = FFX_MIN16_F(100.0f); //ISO arithmetic speed - const FFX_MIN16_F K = FFX_MIN16_F(12.5f); - const FFX_MIN16_F ExposureISO100 = log2((Lavg * S) / K); - - const FFX_MIN16_F q = FFX_MIN16_F(0.65f); - const FFX_MIN16_F Lmax = (FFX_MIN16_F(78.0f) / (q * S)) * ffxPow(FFX_MIN16_F(2.0f), ExposureISO100); - - return FFX_MIN16_F(1) / Lmax; -} -#endif FfxInt32x2 ComputeHrPosFromLrPos(FfxInt32x2 iPxLrPos) { FfxFloat32x2 fSrcJitteredPos = FfxFloat32x2(iPxLrPos) + 0.5f - Jitter(); - FfxFloat32x2 fLrPosInHr = (fSrcJitteredPos / RenderSize()) * DisplaySize(); + FfxFloat32x2 fLrPosInHr = (fSrcJitteredPos / RenderSize()) * UpscaleSize(); FfxInt32x2 iPxHrPos = FfxInt32x2(floor(fLrPosInHr)); return iPxHrPos; } -#if FFX_HALF -FFX_MIN16_I2 ComputeHrPosFromLrPos(FFX_MIN16_I2 iPxLrPos) -{ - FFX_MIN16_F2 fSrcJitteredPos = FFX_MIN16_F2(iPxLrPos) + FFX_MIN16_F(0.5f) - FFX_MIN16_F2(Jitter()); - FFX_MIN16_F2 fLrPosInHr = (fSrcJitteredPos / FFX_MIN16_F2(RenderSize())) * FFX_MIN16_F2(DisplaySize()); - FFX_MIN16_I2 iPxHrPos = FFX_MIN16_I2(floor(fLrPosInHr)); - return iPxHrPos; -} -#endif FfxFloat32x2 ComputeNdc(FfxFloat32x2 fPxPos, FfxInt32x2 iSize) { @@ -491,24 +346,6 @@ FfxFloat32 GetMaxDistanceInMeters() #endif } -FfxFloat32x3 PrepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure, FfxFloat32 fPreExposure) -{ - fRgb /= fPreExposure; - fRgb *= fExposure; - - fRgb = clamp(fRgb, 0.0f, FSR3UPSCALER_FP16_MAX); - - return fRgb; -} - -FfxFloat32x3 UnprepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure) -{ - fRgb /= fExposure; - fRgb *= PreExposure(); - - return fRgb; -} - struct BilinearSamplingData { diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h.meta b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h.meta deleted file mode 100644 index 9fb7653..0000000 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h.meta +++ /dev/null @@ -1,65 +0,0 @@ -fileFormatVersion: 2 -guid: 61bd10363d44ee2478461c9e9efbcb67 -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - : Any - second: - enabled: 0 - settings: - Exclude Editor: 1 - Exclude GameCoreScarlett: 1 - Exclude GameCoreXboxOne: 1 - Exclude Linux64: 1 - Exclude OSXUniversal: 1 - Exclude PS4: 1 - Exclude PS5: 1 - Exclude Win: 1 - Exclude Win64: 1 - - first: - Any: - second: - enabled: 0 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - - first: - Standalone: Linux64 - second: - enabled: 0 - settings: - CPU: None - - first: - Standalone: OSXUniversal - second: - enabled: 0 - settings: - CPU: None - - first: - Standalone: Win - second: - enabled: 0 - settings: - CPU: None - - first: - Standalone: Win64 - second: - enabled: 0 - settings: - CPU: None - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_debug_view.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_debug_view.h new file mode 100644 index 0000000..6f4fa33 --- /dev/null +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_debug_view.h @@ -0,0 +1,159 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +struct FfxDebugViewport +{ + FfxInt32x2 offset; + FfxInt32x2 size; +}; + +// Macro to cull and draw debug viewport +#define DRAW_VIEWPORT(function, pos, vp) \ + { \ + if (pointIsInsideViewport(pos, vp)) \ + { \ + function(pos, vp); \ + } \ + } + +FfxFloat32x2 getTransformedUv(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxFloat32x2 fUv = (FfxFloat32x2(iPxPos - vp.offset) + 0.5f) / vp.size; + + return fUv; +} + +FfxFloat32x3 getMotionVectorColor(FfxFloat32x2 fMotionVector) +{ + return FfxFloat32x3(0.5f + fMotionVector * RenderSize() * 0.5f, 0.5f); +} + +FfxFloat32x4 getUnusedIndicationColor(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxInt32x2 basePos = iPxPos - vp.offset; + + FfxFloat32 ar = FfxFloat32(vp.size.x) / FfxFloat32(vp.size.y); + + return FfxFloat32x4(basePos.x == FfxInt32(basePos.y * ar), 0, 0, 1); +} + +void drawDilatedMotionVectors(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + FfxFloat32x2 fUv_HW = ClampUv(fUv, RenderSize(), MaxRenderSize()); + + FfxFloat32x2 fMotionVector = SampleDilatedMotionVector(fUv_HW); + + StoreUpscaledOutput(iPxPos, getMotionVectorColor(fMotionVector)); +} + +void drawDisocclusionMask(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + FfxFloat32x2 fUv_HW = ClampUv(fUv, RenderSize(), MaxRenderSize()); + + FfxFloat32 fDisocclusionFactor = ffxSaturate(SampleDilatedReactiveMasks(fUv_HW)[DISOCCLUSION]); + + StoreUpscaledOutput(iPxPos, FfxFloat32x3(0, fDisocclusionFactor, 0)); +} + +void drawDetailProtectionTakedown(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + FfxFloat32x2 fUv_HW = ClampUv(fUv, RenderSize(), MaxRenderSize()); + + FfxFloat32 fProtectionTakedown = ffxSaturate(SampleDilatedReactiveMasks(fUv_HW)[REACTIVE]); + + StoreUpscaledOutput(iPxPos, FfxFloat32x3(0, fProtectionTakedown, 0)); +} + +void drawReactiveness(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + FfxFloat32x2 fUv_HW = ClampUv(fUv, RenderSize(), MaxRenderSize()); + + FfxFloat32 fShadingChange = ffxSaturate(SampleDilatedReactiveMasks(fUv_HW)[SHADING_CHANGE]); + + StoreUpscaledOutput(iPxPos, FfxFloat32x3(0, fShadingChange, 0)); +} + +void drawProtectedAreas(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + FfxFloat32 fProtection = ffxSaturate(SampleHistory(fUv).w - fLockThreshold); + + StoreUpscaledOutput(iPxPos, FfxFloat32x3(fProtection, 0, 0)); +} + +void drawDilatedDepthInMeters(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + FfxFloat32x2 fUv_HW = ClampUv(fUv, RenderSize(), MaxRenderSize()); + + const FfxFloat32 fDilatedDepth = SampleDilatedDepth(fUv_HW); + const FfxFloat32 fDepthInMeters = GetViewSpaceDepthInMeters(fDilatedDepth); + + StoreUpscaledOutput(iPxPos, FfxFloat32x3(ffxSaturate(fDepthInMeters / 25.0f), 0, 0)); +} + +FfxBoolean pointIsInsideViewport(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxInt32x2 extent = vp.offset + vp.size; + + return (iPxPos.x >= vp.offset.x && iPxPos.x < extent.x) && (iPxPos.y >= vp.offset.y && iPxPos.y < extent.y); +} + +void DebugView(FfxInt32x2 iPxPos) +{ +#define VIEWPORT_GRID_SIZE_X 3 +#define VIEWPORT_GRID_SIZE_Y 3 + + FfxFloat32x2 fViewportScale = FfxFloat32x2(1.0f / VIEWPORT_GRID_SIZE_X, 1.0f / VIEWPORT_GRID_SIZE_Y); + FfxInt32x2 iViewportSize = FfxInt32x2(UpscaleSize() * fViewportScale); + + // compute grid [y][x] for easier placement of viewports + FfxDebugViewport vp[VIEWPORT_GRID_SIZE_Y][VIEWPORT_GRID_SIZE_X]; + for (FfxInt32 y = 0; y < VIEWPORT_GRID_SIZE_Y; y++) + { + for (FfxInt32 x = 0; x < VIEWPORT_GRID_SIZE_X; x++) + { + vp[y][x].offset = iViewportSize * FfxInt32x2(x, y); + vp[y][x].size = iViewportSize; + } + } + + // top row + DRAW_VIEWPORT(drawDilatedMotionVectors, iPxPos, vp[0][0]); + DRAW_VIEWPORT(drawProtectedAreas, iPxPos, vp[0][1]); + DRAW_VIEWPORT(drawDilatedDepthInMeters, iPxPos, vp[0][2]); + + // bottom row + DRAW_VIEWPORT(drawDisocclusionMask, iPxPos, vp[2][0]); + DRAW_VIEWPORT(drawReactiveness, iPxPos, vp[2][1]); + DRAW_VIEWPORT(drawDetailProtectionTakedown, iPxPos, vp[2][2]); +} diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_debug_view.h.meta b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_debug_view.h.meta new file mode 100644 index 0000000..85dd541 --- /dev/null +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_debug_view.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 182c05fb699007e4cb9010b200259150 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_depth_clip.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_depth_clip.h deleted file mode 100644 index 53763c8..0000000 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_depth_clip.h +++ /dev/null @@ -1,259 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#ifndef FFX_FSR3UPSCALER_DEPTH_CLIP_H -#define FFX_FSR3UPSCALER_DEPTH_CLIP_H - -FFX_STATIC const FfxFloat32 DepthClipBaseScale = 4.0f; - -FfxFloat32 ComputeDepthClip(FfxFloat32x2 fUvSample, FfxFloat32 fCurrentDepthSample) -{ - FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(fCurrentDepthSample); - BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUvSample, RenderSize()); - - FfxFloat32 fDilatedSum = 0.0f; - FfxFloat32 fDepth = 0.0f; - FfxFloat32 fWeightSum = 0.0f; - for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) { - - const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; - const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset; - - if (IsOnScreen(iSamplePos, RenderSize())) { - const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex]; - if (fWeight > fReconstructedDepthBilinearWeightThreshold) { - - const FfxFloat32 fPrevDepthSample = LoadReconstructedPrevDepth(iSamplePos); - const FfxFloat32 fPrevNearestDepthViewSpace = GetViewSpaceDepth(fPrevDepthSample); - - const FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace; - - if (fDepthDiff > 0.0f) { - -#if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH - const FfxFloat32 fPlaneDepth = ffxMin(fPrevDepthSample, fCurrentDepthSample); -#else - const FfxFloat32 fPlaneDepth = ffxMax(fPrevDepthSample, fCurrentDepthSample); -#endif - - const FfxFloat32x3 fCenter = GetViewSpacePosition(FfxInt32x2(RenderSize() * 0.5f), RenderSize(), fPlaneDepth); - const FfxFloat32x3 fCorner = GetViewSpacePosition(FfxInt32x2(0, 0), RenderSize(), fPlaneDepth); - - const FfxFloat32 fHalfViewportWidth = length(FfxFloat32x2(RenderSize())); - const FfxFloat32 fDepthThreshold = ffxMax(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace); - - const FfxFloat32 Ksep = 1.37e-05f; - const FfxFloat32 Kfov = length(fCorner) / length(fCenter); - const FfxFloat32 fRequiredDepthSeparation = Ksep * Kfov * fHalfViewportWidth * fDepthThreshold; - - const FfxFloat32 fResolutionFactor = ffxSaturate(length(FfxFloat32x2(RenderSize())) / length(FfxFloat32x2(1920.0f, 1080.0f))); - const FfxFloat32 fPower = ffxLerp(1.0f, 3.0f, fResolutionFactor); - fDepth += ffxPow(ffxSaturate(FfxFloat32(fRequiredDepthSeparation / fDepthDiff)), fPower) * fWeight; - fWeightSum += fWeight; - } - } - } - } - - return (fWeightSum > 0) ? ffxSaturate(1.0f - fDepth / fWeightSum) : 0.0f; -} - -FfxFloat32 ComputeMotionDivergence(FfxInt32x2 iPxPos, FfxInt32x2 iPxInputMotionVectorSize) -{ - FfxFloat32 minconvergence = 1.0f; - - FfxFloat32x2 fMotionVectorNucleus = LoadInputMotionVector(iPxPos); - FfxFloat32 fNucleusVelocityLr = length(fMotionVectorNucleus * RenderSize()); - FfxFloat32 fMaxVelocityUv = length(fMotionVectorNucleus); - - const FfxFloat32 MotionVectorVelocityEpsilon = 1e-02f; - - if (fNucleusVelocityLr > MotionVectorVelocityEpsilon) { - for (FfxInt32 y = -1; y <= 1; ++y) { - for (FfxInt32 x = -1; x <= 1; ++x) { - - FfxInt32x2 sp = ClampLoad(iPxPos, FfxInt32x2(x, y), iPxInputMotionVectorSize); - - FfxFloat32x2 fMotionVector = LoadInputMotionVector(sp); - FfxFloat32 fVelocityUv = length(fMotionVector); - - fMaxVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv); - fVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv); - minconvergence = ffxMin(minconvergence, dot(fMotionVector / fVelocityUv, fMotionVectorNucleus / fVelocityUv)); - } - } - } - - return ffxSaturate(1.0f - minconvergence) * ffxSaturate(fMaxVelocityUv / 0.01f); -} - -FfxFloat32 ComputeDepthDivergence(FfxInt32x2 iPxPos) -{ - const FfxFloat32 fMaxDistInMeters = GetMaxDistanceInMeters(); - FfxFloat32 fDepthMax = 0.0f; - FfxFloat32 fDepthMin = fMaxDistInMeters; - - FfxInt32 iMaxDistFound = 0; - - for (FfxInt32 y = -1; y < 2; y++) { - for (FfxInt32 x = -1; x < 2; x++) { - - const FfxInt32x2 iOffset = FfxInt32x2(x, y); - const FfxInt32x2 iSamplePos = iPxPos + iOffset; - - const FfxFloat32 fOnScreenFactor = IsOnScreen(iSamplePos, RenderSize()) ? 1.0f : 0.0f; - FfxFloat32 fDepth = GetViewSpaceDepthInMeters(LoadDilatedDepth(iSamplePos)) * fOnScreenFactor; - - iMaxDistFound |= FfxInt32(fMaxDistInMeters == fDepth); - - fDepthMin = ffxMin(fDepthMin, fDepth); - fDepthMax = ffxMax(fDepthMax, fDepth); - } - } - - return (1.0f - fDepthMin / fDepthMax) * (FfxBoolean(iMaxDistFound) ? 0.0f : 1.0f); -} - -FfxFloat32 ComputeTemporalMotionDivergence(FfxInt32x2 iPxPos) -{ - const FfxFloat32x2 fUv = FfxFloat32x2(iPxPos + 0.5f) / RenderSize(); - - FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos); - FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; - fReprojectedUv = ClampUv(fReprojectedUv, RenderSize(), MaxRenderSize()); - FfxFloat32x2 fPrevMotionVector = SamplePreviousDilatedMotionVector(fReprojectedUv); - - float fPxDistance = length(fMotionVector * DisplaySize()); - return fPxDistance > 1.0f ? ffxLerp(0.0f, 1.0f - ffxSaturate(length(fPrevMotionVector) / length(fMotionVector)), ffxSaturate(ffxPow(fPxDistance / 20.0f, 3.0f))) : 0; -} - -void PreProcessReactiveMasks(FfxInt32x2 iPxLrPos, FfxFloat32 fMotionDivergence) -{ - // Compensate for bilinear sampling in accumulation pass - - FfxFloat32x3 fReferenceColor = LoadInputColor(iPxLrPos).xyz; - FfxFloat32x2 fReactiveFactor = FfxFloat32x2(0.0f, fMotionDivergence); - - float fMasksSum = 0.0f; - - FfxFloat32x3 fColorSamples[9]; - FfxFloat32 fReactiveSamples[9]; - FfxFloat32 fTransparencyAndCompositionSamples[9]; - - FFX_UNROLL - for (FfxInt32 y = -1; y < 2; y++) { - FFX_UNROLL - for (FfxInt32 x = -1; x < 2; x++) { - - const FfxInt32x2 sampleCoord = ClampLoad(iPxLrPos, FfxInt32x2(x, y), FfxInt32x2(RenderSize())); - - FfxInt32 sampleIdx = (y + 1) * 3 + x + 1; - - FfxFloat32x3 fColorSample = LoadInputColor(sampleCoord).xyz; - FfxFloat32 fReactiveSample = LoadReactiveMask(sampleCoord); - FfxFloat32 fTransparencyAndCompositionSample = LoadTransparencyAndCompositionMask(sampleCoord); - - fColorSamples[sampleIdx] = fColorSample; - fReactiveSamples[sampleIdx] = fReactiveSample; - fTransparencyAndCompositionSamples[sampleIdx] = fTransparencyAndCompositionSample; - - fMasksSum += (fReactiveSample + fTransparencyAndCompositionSample); - } - } - - if (fMasksSum > 0) - { - for (FfxInt32 sampleIdx = 0; sampleIdx < 9; sampleIdx++) - { - FfxFloat32x3 fColorSample = fColorSamples[sampleIdx]; - FfxFloat32 fReactiveSample = fReactiveSamples[sampleIdx]; - FfxFloat32 fTransparencyAndCompositionSample = fTransparencyAndCompositionSamples[sampleIdx]; - - const FfxFloat32 fMaxLenSq = ffxMax(dot(fReferenceColor, fReferenceColor), dot(fColorSample, fColorSample)); - const FfxFloat32 fSimilarity = dot(fReferenceColor, fColorSample) / fMaxLenSq; - - // Increase power for non-similar samples - const FfxFloat32 fPowerBiasMax = 6.0f; - const FfxFloat32 fSimilarityPower = 1.0f + (fPowerBiasMax - fSimilarity * fPowerBiasMax); - const FfxFloat32 fWeightedReactiveSample = ffxPow(fReactiveSample, fSimilarityPower); - const FfxFloat32 fWeightedTransparencyAndCompositionSample = ffxPow(fTransparencyAndCompositionSample, fSimilarityPower); - - fReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32x2(fWeightedReactiveSample, fWeightedTransparencyAndCompositionSample)); - } - } - - StoreDilatedReactiveMasks(iPxLrPos, fReactiveFactor); -} - -FfxFloat32x3 ComputePreparedInputColor(FfxInt32x2 iPxLrPos) -{ - //We assume linear data. if non-linear input (sRGB, ...), - //then we should convert to linear first and back to sRGB on output. - FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos)); - - fRgb = PrepareRgb(fRgb, Exposure(), PreExposure()); - - const FfxFloat32x3 fPreparedYCoCg = RGBToYCoCg(fRgb); - - return fPreparedYCoCg; -} - -FfxFloat32 EvaluateSurface(FfxInt32x2 iPxPos, FfxFloat32x2 fMotionVector) -{ - FfxFloat32 d0 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, -1))); - FfxFloat32 d1 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 0))); - FfxFloat32 d2 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 1))); - - return 1.0f - FfxFloat32(((d0 - d1) > (d1 * 0.01f)) && ((d1 - d2) > (d2 * 0.01f))); -} - -void DepthClip(FfxInt32x2 iPxPos) -{ - FfxFloat32x2 fDepthUv = (iPxPos + 0.5f) / RenderSize(); - FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos); - - // Discard tiny mvs - fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.01f); - - const FfxFloat32x2 fDilatedUv = fDepthUv + fMotionVector; - const FfxFloat32 fDilatedDepth = LoadDilatedDepth(iPxPos); - const FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(LoadInputDepth(iPxPos)); - - // Compute prepared input color and depth clip - FfxFloat32 fDepthClip = ComputeDepthClip(fDilatedUv, fDilatedDepth) * EvaluateSurface(iPxPos, fMotionVector); - FfxFloat32x3 fPreparedYCoCg = ComputePreparedInputColor(iPxPos); - StorePreparedInputColor(iPxPos, FfxFloat32x4(fPreparedYCoCg, fDepthClip)); - - // Compute dilated reactive mask -#if FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS - FfxInt32x2 iSamplePos = iPxPos; -#else - FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxPos); -#endif - - FfxFloat32 fMotionDivergence = ComputeMotionDivergence(iSamplePos, RenderSize()); - FfxFloat32 fTemporalMotionDifference = ffxSaturate(ComputeTemporalMotionDivergence(iPxPos) - ComputeDepthDivergence(iPxPos)); - - PreProcessReactiveMasks(iPxPos, ffxMax(fTemporalMotionDifference, fMotionDivergence)); -} - -#endif //!defined( FFX_FSR3UPSCALER_DEPTH_CLIPH ) diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_depth_clip.h.meta b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_depth_clip.h.meta deleted file mode 100644 index 891d3d1..0000000 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_depth_clip.h.meta +++ /dev/null @@ -1,65 +0,0 @@ -fileFormatVersion: 2 -guid: 7c662249d70c4434da4f2da00e432c38 -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - : Any - second: - enabled: 0 - settings: - Exclude Editor: 1 - Exclude GameCoreScarlett: 1 - Exclude GameCoreXboxOne: 1 - Exclude Linux64: 1 - Exclude OSXUniversal: 1 - Exclude PS4: 1 - Exclude PS5: 1 - Exclude Win: 1 - Exclude Win64: 1 - - first: - Any: - second: - enabled: 0 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - - first: - Standalone: Linux64 - second: - enabled: 0 - settings: - CPU: None - - first: - Standalone: OSXUniversal - second: - enabled: 0 - settings: - CPU: None - - first: - Standalone: Win - second: - enabled: 0 - settings: - CPU: None - - first: - Standalone: Win64 - second: - enabled: 0 - settings: - CPU: None - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_lock.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_lock.h deleted file mode 100644 index e1a0d06..0000000 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_lock.h +++ /dev/null @@ -1,116 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#ifndef FFX_FSR3UPSCALER_LOCK_H -#define FFX_FSR3UPSCALER_LOCK_H - -void ClearResourcesForNextFrame(in FfxInt32x2 iPxHrPos) -{ - if (all(FFX_LESS_THAN(iPxHrPos, FfxInt32x2(RenderSize())))) - { -#if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH - const FfxUInt32 farZ = 0x0; -#else - const FfxUInt32 farZ = 0x3f800000; -#endif - SetReconstructedDepth(iPxHrPos, farZ); - } -} - -FfxBoolean ComputeThinFeatureConfidence(FfxInt32x2 pos) -{ - const FfxInt32 RADIUS = 1; - - FfxFloat32 fNucleus = LoadLockInputLuma(pos); - - FfxFloat32 similar_threshold = 1.05f; - FfxFloat32 dissimilarLumaMin = FSR3UPSCALER_FLT_MAX; - FfxFloat32 dissimilarLumaMax = 0; - - /* - 0 1 2 - 3 4 5 - 6 7 8 - */ - - #define SETBIT(x) (1U << x) - - FfxUInt32 mask = SETBIT(4); //flag fNucleus as similar - - const FfxUInt32 uNumRejectionMasks = 4; - const FfxUInt32 uRejectionMasks[uNumRejectionMasks] = { - SETBIT(0) | SETBIT(1) | SETBIT(3) | SETBIT(4), //Upper left - SETBIT(1) | SETBIT(2) | SETBIT(4) | SETBIT(5), //Upper right - SETBIT(3) | SETBIT(4) | SETBIT(6) | SETBIT(7), //Lower left - SETBIT(4) | SETBIT(5) | SETBIT(7) | SETBIT(8), //Lower right - }; - - FfxInt32 idx = 0; - FFX_UNROLL - for (FfxInt32 y = -RADIUS; y <= RADIUS; y++) { - FFX_UNROLL - for (FfxInt32 x = -RADIUS; x <= RADIUS; x++, idx++) { - if (x == 0 && y == 0) continue; - - FfxInt32x2 samplePos = ClampLoad(pos, FfxInt32x2(x, y), FfxInt32x2(RenderSize())); - - FfxFloat32 sampleLuma = LoadLockInputLuma(samplePos); - FfxFloat32 difference = ffxMax(sampleLuma, fNucleus) / ffxMin(sampleLuma, fNucleus); - - if (difference > 0 && (difference < similar_threshold)) { - mask |= SETBIT(idx); - } else { - dissimilarLumaMin = ffxMin(dissimilarLumaMin, sampleLuma); - dissimilarLumaMax = ffxMax(dissimilarLumaMax, sampleLuma); - } - } - } - - FfxBoolean isRidge = fNucleus > dissimilarLumaMax || fNucleus < dissimilarLumaMin; - - if (FFX_FALSE == isRidge) { - - return false; - } - - FFX_UNROLL - for (FfxInt32 i = 0; i < 4; i++) { - - if ((mask & uRejectionMasks[i]) == uRejectionMasks[i]) { - return false; - } - } - - return true; -} - -void ComputeLock(FfxInt32x2 iPxLrPos) -{ - if (ComputeThinFeatureConfidence(iPxLrPos)) - { - StoreNewLocks(ComputeHrPosFromLrPos(iPxLrPos), 1.f); - } - - // ClearResourcesForNextFrame(iPxLrPos); -} - -#endif // FFX_FSR3UPSCALER_LOCK_H diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_lock.h.meta b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_lock.h.meta deleted file mode 100644 index 4013169..0000000 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_lock.h.meta +++ /dev/null @@ -1,65 +0,0 @@ -fileFormatVersion: 2 -guid: c7e9f53dd040b2645af5ccd936a94b0e -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - : Any - second: - enabled: 0 - settings: - Exclude Editor: 1 - Exclude GameCoreScarlett: 1 - Exclude GameCoreXboxOne: 1 - Exclude Linux64: 1 - Exclude OSXUniversal: 1 - Exclude PS4: 1 - Exclude PS5: 1 - Exclude Win: 1 - Exclude Win64: 1 - - first: - Any: - second: - enabled: 0 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - - first: - Standalone: Linux64 - second: - enabled: 0 - settings: - CPU: None - - first: - Standalone: OSXUniversal - second: - enabled: 0 - settings: - CPU: None - - first: - Standalone: Win - second: - enabled: 0 - settings: - CPU: None - - first: - Standalone: Win64 - second: - enabled: 0 - settings: - CPU: None - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_instability.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_instability.h new file mode 100644 index 0000000..624b7ca --- /dev/null +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_instability.h @@ -0,0 +1,115 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +struct LumaInstabilityFactorData +{ + FfxFloat32x4 fLumaHistory; + FfxFloat32 fLumaInstabilityFactor; +}; + +LumaInstabilityFactorData ComputeLumaInstabilityFactor(LumaInstabilityFactorData data, FfxFloat32 fCurrentFrameLuma, FfxFloat32 fFarthestDepthInMeters) +{ + const FfxInt32 N_MINUS_1 = 0; + const FfxInt32 N_MINUS_2 = 1; + const FfxInt32 N_MINUS_3 = 2; + const FfxInt32 N_MINUS_4 = 3; + + FfxFloat32 fLumaInstability = 0.0f; + const FfxFloat32 fDiffs0 = (fCurrentFrameLuma - data.fLumaHistory[N_MINUS_1]); + const FfxFloat32 fSimilarity0 = MinDividedByMax(fCurrentFrameLuma, data.fLumaHistory[N_MINUS_1], 1.0f); + + FfxFloat32 fMaxSimilarity = fSimilarity0; + + if (fSimilarity0 < 1.0f) { + for (int i = N_MINUS_2; i <= N_MINUS_4; i++) { + const FfxFloat32 fDiffs1 = (fCurrentFrameLuma - data.fLumaHistory[i]); + const FfxFloat32 fSimilarity1 = MinDividedByMax(fCurrentFrameLuma, data.fLumaHistory[i]); + + if (sign(fDiffs0) == sign(fDiffs1)) { + + fMaxSimilarity = ffxMax(fMaxSimilarity, fSimilarity1); + } + } + + fLumaInstability = FfxFloat32(fMaxSimilarity > fSimilarity0); + } + + // Shift history + data.fLumaHistory[N_MINUS_4] = data.fLumaHistory[N_MINUS_3]; + data.fLumaHistory[N_MINUS_3] = data.fLumaHistory[N_MINUS_2]; + data.fLumaHistory[N_MINUS_2] = data.fLumaHistory[N_MINUS_1]; + data.fLumaHistory[N_MINUS_1] = fCurrentFrameLuma; + + data.fLumaHistory /= Exposure(); + + data.fLumaInstabilityFactor = fLumaInstability * FfxFloat32(data.fLumaHistory[N_MINUS_4] != 0); + + return data; +} + +void LumaInstability(FfxInt32x2 iPxPos) +{ + LumaInstabilityFactorData data; + data.fLumaInstabilityFactor = 0.0f; + data.fLumaHistory = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f); + + const FfxFloat32x2 fDilatedMotionVector = LoadDilatedMotionVector(iPxPos); + const FfxFloat32x2 fUv = (iPxPos + 0.5f) / RenderSize(); + const FfxFloat32x2 fUvCurrFrameJittered = fUv + Jitter() / RenderSize(); + const FfxFloat32x2 fUvPrevFrameJittered = fUv + PreviousFrameJitter() / PreviousFrameRenderSize(); + const FfxFloat32x2 fReprojectedUv = fUvPrevFrameJittered + fDilatedMotionVector; + + if (IsUvInside(fReprojectedUv)) + { + const FfxFloat32x2 fUvReactive_HW = ClampUv(fUvCurrFrameJittered, RenderSize(), MaxRenderSize()); + + const FfxFloat32x4 fDilatedReactiveMasks = SampleDilatedReactiveMasks(fUvReactive_HW); + const FfxFloat32 fReactiveMask = ffxSaturate(fDilatedReactiveMasks[REACTIVE]); + const FfxFloat32 fDisocclusion = ffxSaturate(fDilatedReactiveMasks[DISOCCLUSION]); + const FfxFloat32 fShadingChange = ffxSaturate(fDilatedReactiveMasks[SHADING_CHANGE]); + const FfxFloat32 fAccumulation = ffxSaturate(fDilatedReactiveMasks[ACCUMULAION]); + + const FfxBoolean bAccumulationFactor = fAccumulation > 0.9f; + + const FfxBoolean bComputeInstability = bAccumulationFactor; + + if (bComputeInstability) { + + const FfxFloat32x2 fUv_HW = ClampUv(fUvCurrFrameJittered, RenderSize(), MaxRenderSize()); + const FfxFloat32 fCurrentFrameLuma = SampleCurrentLuma(fUv_HW) * Exposure(); + + const FfxFloat32x2 fReprojectedUv_HW = ClampUv(fReprojectedUv, PreviousFrameRenderSize(), MaxRenderSize()); + data.fLumaHistory = SampleLumaHistory(fReprojectedUv_HW) * DeltaPreExposure() * Exposure(); + + const FfxFloat32x2 fFarthestDepthUv_HW = ClampUv(fUvCurrFrameJittered, RenderSize() / 2, GetFarthestDepthMip1ResourceDimensions()); + const FfxFloat32 fFarthestDepthInMeters = SampleFarthestDepthMip1(fFarthestDepthUv_HW); + + data = ComputeLumaInstabilityFactor(data, fCurrentFrameLuma, fFarthestDepthInMeters); + + const FfxFloat32 fVelocityWeight = 1.0f - ffxSaturate(Get4KVelocity(fDilatedMotionVector) / 20.0f); + data.fLumaInstabilityFactor *= fVelocityWeight * (1.0f - fDisocclusion) * (1.0f - fReactiveMask) * (1.0f - fShadingChange); + } + } + + StoreLumaHistory(iPxPos, data.fLumaHistory); + StoreLumaInstability(iPxPos, data.fLumaInstabilityFactor); +} diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_instability.h.meta b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_instability.h.meta new file mode 100644 index 0000000..34ed1b5 --- /dev/null +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_instability.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: ccf5158f46f46d040b6142a6c7cb1d75 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid.h similarity index 64% rename from Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h rename to Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid.h index d26cf23..41c5dc7 100644 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid.h @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,7 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - FFX_GROUPSHARED FfxUInt32 spdCounter; void SpdIncreaseAtomicCounter(FfxUInt32 slice) @@ -43,49 +43,68 @@ FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16]; FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16]; FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16]; -FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 tex, FfxUInt32 slice) +FFX_STATIC const FfxInt32 LOG_LUMA = 0; +FFX_STATIC const FfxInt32 LUMA = 1; +FFX_STATIC const FfxInt32 DEPTH_IN_METERS = 2; + +FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 iPxPos, FfxUInt32 slice) { - FfxFloat32x2 fUv = (tex + 0.5f + Jitter()) / RenderSize(); - fUv = ClampUv(fUv, RenderSize(), InputColorResourceDimensions()); - FfxFloat32x3 fRgb = SampleInputColor(fUv); + //We assume linear data. if non-linear input (sRGB, ...), + //then we should convert to linear first and back to sRGB on output. + const FfxInt32x2 iPxSamplePos = ClampLoad(FfxInt32x2(iPxPos), FfxInt32x2(0, 0), FfxInt32x2(RenderSize())); - fRgb /= PreExposure(); - - //compute log luma - const FfxFloat32 fLogLuma = log(ffxMax(FSR3UPSCALER_EPSILON, RGBToLuma(fRgb))); + const FfxFloat32 fLuma = LoadCurrentLuma(iPxSamplePos); + const FfxFloat32 fLogLuma = ffxMax(FSR3UPSCALER_EPSILON, log(fLuma)); + const FfxFloat32 fFarthestDepthInMeters = LoadFarthestDepth(iPxSamplePos); - // Make sure out of screen pixels contribute no value to the end result - const FfxFloat32 result = all(FFX_LESS_THAN(tex, RenderSize())) ? fLogLuma : 0.0f; + FfxFloat32x4 fOutput = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f); + fOutput[LOG_LUMA] = fLogLuma; + fOutput[LUMA] = fLuma; + fOutput[DEPTH_IN_METERS] = fFarthestDepthInMeters; - return FfxFloat32x4(result, 0, 0, 0); + return fOutput; } FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice) { - return SPD_LoadMipmap5(tex); + return FfxFloat32x4(RWLoadPyramid(tex, 5), 0, 0); +} + +FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) +{ + return (v0 + v1 + v2 + v3) * 0.25f; } void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice) { - if (index == LumaMipLevelToUse() || index == 5) + if (index == 5) { - SPD_SetMipmap(pix, index, outValue.r); + StorePyramid(pix, outValue.xy, index); + } + else if (index == 0) { + StoreFarthestDepthMip1(pix, outValue[DEPTH_IN_METERS]); } if (index == MipCount() - 1) { //accumulate on 1x1 level if (all(FFX_EQUAL(pix, FfxInt32x2(0, 0)))) { - FfxFloat32 prev = SPD_LoadExposureBuffer().y; - FfxFloat32 result = outValue.r; + FfxFloat32x4 frameInfo = LoadFrameInfo(); + const FfxFloat32 fSceneAvgLuma = outValue[LUMA]; + const FfxFloat32 fPrevLogLuma = frameInfo[FRAME_INFO_LOG_LUMA]; + FfxFloat32 fLogLuma = outValue[LOG_LUMA]; - if (prev < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values + if (fPrevLogLuma < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values { - FfxFloat32 rate = 1.0f; - result = prev + (result - prev) * (1 - exp(-DeltaTime() * rate)); + fLogLuma = fPrevLogLuma + (fLogLuma - fPrevLogLuma) * (1.0f - exp(-DeltaTime())); + fLogLuma = ffxMax(0.0f, fLogLuma); } - FfxFloat32x2 spdOutput = FfxFloat32x2(ComputeAutoExposureFromLavg(result), result); - SPD_SetExposureBuffer(spdOutput); + + frameInfo[FRAME_INFO_EXPOSURE] = ComputeAutoExposureFromLavg(fLogLuma); + frameInfo[FRAME_INFO_LOG_LUMA] = fLogLuma; + frameInfo[FRAME_INFO_SCENE_AVERAGE_LUMA] = fSceneAvgLuma; + + StoreFrameInfo(frameInfo); } } } @@ -105,10 +124,7 @@ void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) spdIntermediateB[x][y] = value.z; spdIntermediateA[x][y] = value.w; } -FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) -{ - return (v0 + v1 + v2 + v3) * 0.25f; -} + #endif // define fetch and store functions Packed diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid.h.meta b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid.h.meta new file mode 100644 index 0000000..0949290 --- /dev/null +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 3f5a9f1fd09f2e54a978bc20e89ea326 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h deleted file mode 100644 index 3709113..0000000 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h +++ /dev/null @@ -1,107 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#ifndef FFX_FSR3UPSCALER_POSTPROCESS_LOCK_STATUS_H -#define FFX_FSR3UPSCALER_POSTPROCESS_LOCK_STATUS_H - -FfxFloat32x4 WrapShadingChangeLuma(FfxInt32x2 iPxSample) -{ - return FfxFloat32x4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0); -} - -#if FFX_HALF -FFX_MIN16_F4 WrapShadingChangeLuma(FFX_MIN16_I2 iPxSample) -{ - return FFX_MIN16_F4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0); -} -#endif - -#if FFX_FSR3UPSCALER_OPTION_POSTPROCESSLOCKSTATUS_SAMPLERS_USE_DATA_HALF && FFX_HALF -DeclareCustomFetchBilinearSamplesMin16(FetchShadingChangeLumaSamples, WrapShadingChangeLuma) -#else -DeclareCustomFetchBicubicSamples(FetchShadingChangeLumaSamples, WrapShadingChangeLuma) -#endif -DeclareCustomTextureSample(ShadingChangeLumaSample, Lanczos2, FetchShadingChangeLumaSamples) - -FfxFloat32 GetShadingChangeLuma(FfxInt32x2 iPxHrPos, FfxFloat32x2 fUvCoord) -{ - FfxFloat32 fShadingChangeLuma = 0; - -#if 0 - fShadingChangeLuma = Exposure() * exp(ShadingChangeLumaSample(fUvCoord, LumaMipDimensions()).x); -#else - - const FfxFloat32 fDiv = FfxFloat32(2u << LumaMipLevelToUse()); - FfxInt32x2 iMipRenderSize = FfxInt32x2(RenderSize() / fDiv); - - fUvCoord = ClampUv(fUvCoord, iMipRenderSize, LumaMipDimensions()); - fShadingChangeLuma = Exposure() * exp(FfxFloat32(SampleMipLuma(fUvCoord, LumaMipLevelToUse()))); -#endif - - fShadingChangeLuma = ffxPow(fShadingChangeLuma, 1.0f / 6.0f); - - return fShadingChangeLuma; -} - -void UpdateLockStatus(AccumulationPassCommonParams params, - FFX_PARAMETER_INOUT FfxFloat32 fReactiveFactor, LockState state, - FFX_PARAMETER_INOUT FfxFloat32x2 fLockStatus, - FFX_PARAMETER_OUT FfxFloat32 fLockContributionThisFrame, - FFX_PARAMETER_OUT FfxFloat32 fLuminanceDiff) { - - const FfxFloat32 fShadingChangeLuma = GetShadingChangeLuma(params.iPxHrPos, params.fHrUv); - - //init temporal shading change factor, init to -1 or so in reproject to know if "true new"? - fLockStatus[LOCK_TEMPORAL_LUMA] = (fLockStatus[LOCK_TEMPORAL_LUMA] == FfxFloat32(0.0f)) ? fShadingChangeLuma : fLockStatus[LOCK_TEMPORAL_LUMA]; - - FfxFloat32 fPreviousShadingChangeLuma = fLockStatus[LOCK_TEMPORAL_LUMA]; - - fLuminanceDiff = 1.0f - MinDividedByMax(fPreviousShadingChangeLuma, fShadingChangeLuma); - - if (state.NewLock) { - fLockStatus[LOCK_TEMPORAL_LUMA] = fShadingChangeLuma; - - fLockStatus[LOCK_LIFETIME_REMAINING] = (fLockStatus[LOCK_LIFETIME_REMAINING] != 0.0f) ? 2.0f : 1.0f; - } - else if(fLockStatus[LOCK_LIFETIME_REMAINING] <= 1.0f) { - fLockStatus[LOCK_TEMPORAL_LUMA] = ffxLerp(fLockStatus[LOCK_TEMPORAL_LUMA], FfxFloat32(fShadingChangeLuma), 0.5f); - } - else { - if (fLuminanceDiff > 0.1f) { - KillLock(fLockStatus); - } - } - - fReactiveFactor = ffxMax(fReactiveFactor, ffxSaturate((fLuminanceDiff - 0.1f) * 10.0f)); - fLockStatus[LOCK_LIFETIME_REMAINING] *= (1.0f - fReactiveFactor); - - fLockStatus[LOCK_LIFETIME_REMAINING] *= ffxSaturate(1.0f - params.fAccumulationMask); - fLockStatus[LOCK_LIFETIME_REMAINING] *= FfxFloat32(params.fDepthClipFactor < 0.1f); - - // Compute this frame lock contribution - const FfxFloat32 fLifetimeContribution = ffxSaturate(fLockStatus[LOCK_LIFETIME_REMAINING] - 1.0f); - const FfxFloat32 fShadingChangeContribution = ffxSaturate(MinDividedByMax(fLockStatus[LOCK_TEMPORAL_LUMA], fShadingChangeLuma)); - - fLockContributionThisFrame = ffxSaturate(ffxSaturate(fLifetimeContribution * 4.0f) * fShadingChangeContribution); -} - -#endif //!defined( FFX_FSR3UPSCALER_POSTPROCESS_LOCK_STATUS_H ) diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h.meta b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h.meta deleted file mode 100644 index 8c8bf49..0000000 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h.meta +++ /dev/null @@ -1,65 +0,0 @@ -fileFormatVersion: 2 -guid: 67a8b72ceb93d634f883b086fdccb348 -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - : Any - second: - enabled: 0 - settings: - Exclude Editor: 1 - Exclude GameCoreScarlett: 1 - Exclude GameCoreXboxOne: 1 - Exclude Linux64: 1 - Exclude OSXUniversal: 1 - Exclude PS4: 1 - Exclude PS5: 1 - Exclude Win: 1 - Exclude Win64: 1 - - first: - Any: - second: - enabled: 0 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - - first: - Standalone: Linux64 - second: - enabled: 0 - settings: - CPU: None - - first: - Standalone: OSXUniversal - second: - enabled: 0 - settings: - CPU: None - - first: - Standalone: Win - second: - enabled: 0 - settings: - CPU: None - - first: - Standalone: Win64 - second: - enabled: 0 - settings: - CPU: None - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_inputs.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_inputs.h new file mode 100644 index 0000000..59c765b --- /dev/null +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_inputs.h @@ -0,0 +1,152 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +void ReconstructPrevDepth(FfxInt32x2 iPxPos, FfxFloat32 fDepth, FfxFloat32x2 fMotionVector) +{ + const FfxFloat32 fNearestDepthInMeters = ffxMin(GetViewSpaceDepthInMeters(fDepth), FSR3UPSCALER_FP16_MAX); + const FfxFloat32 fReconstructedDeptMvThreshold = ReconstructedDepthMvPxThreshold(fNearestDepthInMeters); + + // Discard small mvs + fMotionVector *= FfxFloat32(Get4KVelocity(fMotionVector) > fReconstructedDeptMvThreshold); + + const FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5)) / RenderSize(); + const FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; + const BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, RenderSize()); + + // Project current depth into previous frame locations. + // Push to all pixels having some contribution if reprojection is using bilinear logic. + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) { + + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex]; + + if (fWeight > fReconstructedDepthBilinearWeightThreshold) { + + const FfxInt32x2 iStorePos = bilinearInfo.iBasePos + iOffset; + if (IsOnScreen(iStorePos, RenderSize())) { + StoreReconstructedDepth(iStorePos, fDepth); + } + } + } +} + +struct DepthExtents +{ + FfxFloat32 fNearest; + FfxInt32x2 fNearestCoord; + FfxFloat32 fFarthest; +}; + +DepthExtents FindDepthExtents(FFX_PARAMETER_IN FfxInt32x2 iPxPos) +{ + DepthExtents extents; + const FfxInt32 iSampleCount = 9; + const FfxInt32x2 iSampleOffsets[iSampleCount] = { + FfxInt32x2(+0, +0), + FfxInt32x2(+1, +0), + FfxInt32x2(+0, +1), + FfxInt32x2(+0, -1), + FfxInt32x2(-1, +0), + FfxInt32x2(-1, +1), + FfxInt32x2(+1, +1), + FfxInt32x2(-1, -1), + FfxInt32x2(+1, -1), + }; + + // pull out the depth loads to allow SC to batch them + FfxFloat32 depth[9]; + FfxInt32 iSampleIndex = 0; + FFX_UNROLL + for (iSampleIndex = 0; iSampleIndex < iSampleCount; ++iSampleIndex) { + + FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex]; + depth[iSampleIndex] = LoadInputDepth(iPos); + } + + // find closest depth + extents.fNearestCoord = iPxPos; + extents.fNearest = depth[0]; + extents.fFarthest = depth[0]; + FFX_UNROLL + for (iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex) { + + const FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex]; + if (IsOnScreen(iPos, RenderSize())) { + + FfxFloat32 fNdDepth = depth[iSampleIndex]; +#if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH + if (fNdDepth > extents.fNearest) { + extents.fFarthest = ffxMin(extents.fFarthest, fNdDepth); +#else + if (fNdDepth < extents.fNearest) { + extents.fFarthest = ffxMax(extents.fFarthest, fNdDepth); +#endif + extents.fNearestCoord = iPos; + extents.fNearest = fNdDepth; + } + } + } + + return extents; +} + +FfxFloat32x2 DilateMotionVector(FfxInt32x2 iPxPos, const DepthExtents depthExtents) +{ +#if FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS + const FfxInt32x2 iSamplePos = iPxPos; + const FfxInt32x2 iMotionVectorPos = depthExtents.fNearestCoord; +#else + const FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxPos); + const FfxInt32x2 iMotionVectorPos = ComputeHrPosFromLrPos(depthExtents.fNearestCoord); +#endif + + const FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iMotionVectorPos); + + return fDilatedMotionVector; +} + +FfxFloat32 GetCurrentFrameLuma(FfxInt32x2 iPxPos) +{ + //We assume linear data. if non-linear input (sRGB, ...), + //then we should convert to linear first and back to sRGB on output. + const FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxPos)); + const FfxFloat32 fLuma = RGBToLuma(fRgb); + + return fLuma; +} + +void PrepareInputs(FfxInt32x2 iPxPos) +{ + const DepthExtents depthExtents = FindDepthExtents(iPxPos); + const FfxFloat32x2 fDilatedMotionVector = DilateMotionVector(iPxPos, depthExtents); + + ReconstructPrevDepth(iPxPos, depthExtents.fNearest, fDilatedMotionVector); + + StoreDilatedMotionVector(iPxPos, fDilatedMotionVector); + StoreDilatedDepth(iPxPos, depthExtents.fNearest); + + const FfxFloat32 fFarthestDepthInMeters = ffxMin(GetViewSpaceDepthInMeters(depthExtents.fFarthest), FSR3UPSCALER_FP16_MAX); + StoreFarthestDepth(iPxPos, fFarthestDepthInMeters); + + const FfxFloat32 fLuma = GetCurrentFrameLuma(iPxPos); + StoreCurrentLuma(iPxPos, fLuma); +} diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_inputs.h.meta b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_inputs.h.meta new file mode 100644 index 0000000..5c649eb --- /dev/null +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_inputs.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 8c3e6a508446fc34590d9b426d75ae30 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity.h new file mode 100644 index 0000000..fa9571d --- /dev/null +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity.h @@ -0,0 +1,270 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +FfxFloat32 ComputeDisocclusions(FfxFloat32x2 fUv, FfxFloat32x2 fMotionVector, FfxFloat32 fCurrentDepthViewSpace) +{ + const FfxFloat32 fNearestDepthInMeters = ffxMin(fCurrentDepthViewSpace * ViewSpaceToMetersFactor(), FSR3UPSCALER_FP16_MAX); + const FfxFloat32 fReconstructedDeptMvThreshold = ReconstructedDepthMvPxThreshold(fNearestDepthInMeters); + + fMotionVector *= FfxFloat32(Get4KVelocity(fMotionVector) > fReconstructedDeptMvThreshold); + + const FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; + const BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, RenderSize()); + + FfxFloat32 fDisocclusion = 0.0f; + FfxFloat32 fWeightSum = 0.0f; + FfxBoolean bPotentialDisocclusion = true; + + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4 && bPotentialDisocclusion; iSampleIndex++) + { + + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + const FfxInt32x2 iSamplePos = ClampLoad(bilinearInfo.iBasePos, iOffset, FfxInt32x2(RenderSize())); + + if (IsOnScreen(iSamplePos, RenderSize())) { + const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex]; + if (fWeight > fReconstructedDepthBilinearWeightThreshold) { + + const FfxFloat32 fPrevNearestDepthViewSpace = GetViewSpaceDepth(LoadReconstructedPrevDepth(iSamplePos)); + const FfxFloat32 fDepthDifference = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace; + + bPotentialDisocclusion = bPotentialDisocclusion && (fDepthDifference > FSR3UPSCALER_FP32_MIN); + + if (bPotentialDisocclusion) { + const FfxFloat32 fHalfViewportWidth = length(FfxFloat32x2(RenderSize()) * 0.5f); + const FfxFloat32 fDepthThreshold = ffxMax(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace); + + const FfxFloat32 Ksep = 1.37e-05f; + const FfxFloat32 fRequiredDepthSeparation = Ksep * fHalfViewportWidth * fDepthThreshold; + + fDisocclusion += ffxSaturate(FfxFloat32(fRequiredDepthSeparation / fDepthDifference)) * fWeight; + fWeightSum += fWeight; + } + } + } + } + + fDisocclusion = (bPotentialDisocclusion && fWeightSum > 0) ? ffxSaturate(1.0f - fDisocclusion / fWeightSum) : 0.0f; + + return fDisocclusion; +} + +FfxFloat32 ComputeMotionDivergence(FfxFloat32x2 fUv, FfxFloat32x2 fMotionVector, FfxFloat32 fCurrentDepthSample) +{ + const FfxInt32x2 iPxReprojectedPos = FfxInt32x2((fUv + fMotionVector) * RenderSize()); + const FfxFloat32 fReprojectedDepth = LoadDilatedDepth(iPxReprojectedPos); + const FfxFloat32x2 fReprojectedMotionVector = LoadDilatedMotionVector(iPxReprojectedPos); + + const FfxFloat32 fReprojectedVelocity = Get4KVelocity(fReprojectedMotionVector); + const FfxFloat32 f4KVelocity = Get4KVelocity(fMotionVector); + + const FfxFloat32 fMaxLen = max(length(fMotionVector), length(fReprojectedMotionVector)); + + const FfxFloat32 fNucleusDepthInMeters = GetViewSpaceDepthInMeters(fReprojectedDepth); + const FfxFloat32 fCurrentDepthInMeters = GetViewSpaceDepthInMeters(fCurrentDepthSample); + + const FfxFloat32 fDistanceFactor = MinDividedByMax(fNucleusDepthInMeters, fCurrentDepthInMeters); + const FfxFloat32 fVelocityFactor = ffxSaturate(f4KVelocity / 10.0f); + const FfxFloat32 fMotionVectorFieldConfidence = (1.0f - ffxSaturate(fReprojectedVelocity / f4KVelocity)) * fDistanceFactor * fVelocityFactor; + + return fMotionVectorFieldConfidence; +} + +FfxFloat32 DilateReactiveMasks(FfxInt32x2 iPxPos, FfxFloat32x2 fUv) +{ + FfxFloat32 fDilatedReactiveMasks = 0.0f; + + FFX_UNROLL + for (FfxInt32 y = -1; y <=1; y++) + { + FFX_UNROLL + for (FfxInt32 x = -1; x <= 1; x++) + { + const FfxInt32x2 sampleCoord = ClampLoad(iPxPos, FfxInt32x2(x, y), FfxInt32x2(RenderSize())); + fDilatedReactiveMasks = ffxMax(fDilatedReactiveMasks, LoadReactiveMask(sampleCoord)); + } + } + + return fDilatedReactiveMasks; +} + +FfxFloat32 DilateTransparencyAndCompositionMasks(FfxInt32x2 iPxPos, FfxFloat32x2 fUv) +{ + const FfxFloat32x2 fUvTransparencyAndCompositionMask = ClampUv(fUv, RenderSize(), GetTransparencyAndCompositionMaskResourceDimensions()); + return SampleTransparencyAndCompositionMask(fUvTransparencyAndCompositionMask); +} + +FfxFloat32 ComputeThinFeatureConfidence(FfxInt32x2 iPxPos) +{ + /* + 1 2 3 + 4 0 5 + 6 7 8 + */ + + const FfxInt32 iNucleusIndex = 0; + const FfxInt32 iSampleCount = 9; + const FfxInt32x2 iSampleOffsets[iSampleCount] = { + FfxInt32x2(+0, +0), + FfxInt32x2(-1, -1), + FfxInt32x2(+0, -1), + FfxInt32x2(+1, -1), + FfxInt32x2(-1, +0), + FfxInt32x2(+1, +0), + FfxInt32x2(-1, +1), + FfxInt32x2(+0, +1), + FfxInt32x2(+1, +1), + }; + + FfxFloat32 fSamples[iSampleCount]; + + FfxFloat32 fLumaMin = FSR3UPSCALER_FP32_MAX; + FfxFloat32 fLumaMax = FSR3UPSCALER_FP32_MIN; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 0; iSampleIndex < iSampleCount; ++iSampleIndex) { + const FfxInt32x2 iPxSamplePos = ClampLoad(iPxPos, iSampleOffsets[iSampleIndex], FfxInt32x2(RenderSize())); + fSamples[iSampleIndex] = LoadCurrentLuma(iPxSamplePos) * Exposure(); + + fLumaMin = ffxMin(fLumaMin, fSamples[iSampleIndex]); + fLumaMax = ffxMax(fLumaMax, fSamples[iSampleIndex]); + } + + const FfxFloat32 fThreshold = 0.9f; + FfxFloat32 fDissimilarLumaMin = FSR3UPSCALER_FP32_MAX; + FfxFloat32 fDissimilarLumaMax = 0; + +#define SETBIT(x) (1U << x) + + FfxUInt32 uPatternMask = SETBIT(iNucleusIndex); // Flag nucleus as similar + + const FfxUInt32 uNumRejectionMasks = 4; + const FfxUInt32 uRejectionMasks[uNumRejectionMasks] = { + SETBIT(1) | SETBIT(2) | SETBIT(4) | SETBIT(iNucleusIndex), // Upper left + SETBIT(2) | SETBIT(3) | SETBIT(5) | SETBIT(iNucleusIndex), // Upper right + SETBIT(4) | SETBIT(6) | SETBIT(7) | SETBIT(iNucleusIndex), // Lower left + SETBIT(5) | SETBIT(7) | SETBIT(8) | SETBIT(iNucleusIndex) // Lower right + }; + + FfxInt32 iBitIndex = 1; + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex, ++iBitIndex) { + + const FfxFloat32 fDifference = abs(fSamples[iSampleIndex] - fSamples[iNucleusIndex]) / (fLumaMax - fLumaMin); + + if (fDifference < fThreshold) + { + uPatternMask |= SETBIT(iBitIndex); + } + else + { + fDissimilarLumaMin = ffxMin(fDissimilarLumaMin, fSamples[iSampleIndex]); + fDissimilarLumaMax = ffxMax(fDissimilarLumaMax, fSamples[iSampleIndex]); + } + } + + const FfxBoolean bIsRidge = fSamples[iNucleusIndex] > fDissimilarLumaMax || fSamples[iNucleusIndex] < fDissimilarLumaMin; + + if (FFX_FALSE == bIsRidge) + { + return 0.0f; + } + + FFX_UNROLL + for (FfxInt32 i = 0; i < uNumRejectionMasks; i++) + { + if ((uPatternMask & uRejectionMasks[i]) == uRejectionMasks[i]) + { + return 0.0f; + } + } + + return 1.0f - fLumaMin / fLumaMax; +} + +FfxFloat32 UpdateAccumulation(FfxInt32x2 iPxPos, FfxFloat32x2 fUv, FfxFloat32x2 fMotionVector, FfxFloat32 fDisocclusion, FfxFloat32 fShadingChange) +{ + const FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; + FfxFloat32 fAccumulation = 0.0f; + + if (IsUvInside(fReprojectedUv)) { + const FfxFloat32x2 fReprojectedUv_HW = ClampUv(fReprojectedUv, PreviousFrameRenderSize(), MaxRenderSize()); + fAccumulation = ffxSaturate(SampleAccumulation(fReprojectedUv_HW)); + } + + fAccumulation = ffxLerp(fAccumulation, 0.0f, fShadingChange); + fAccumulation = ffxLerp(fAccumulation, ffxMin(fAccumulation, 0.25f), fDisocclusion); + + fAccumulation *= FfxFloat32(round(fAccumulation * 100.0f) > 1.0f); + + // Update for next frame, normalize to store in unorm + const FfxFloat32 fAccumulatedFramesMax = 3.0f; + const FfxFloat32 fAccumulatedFramesToStore = ffxSaturate(fAccumulation + (1.0f / fAccumulatedFramesMax)); + StoreAccumulation(iPxPos, fAccumulatedFramesToStore); + + return fAccumulation; +} + +FfxFloat32 ComputeShadingChange(FfxFloat32x2 fUv) +{ + // NOTE: Here we re-apply jitter, will be reverted again when sampled in accumulation pass + const FfxFloat32x2 fShadingChangeUv = ClampUv(fUv - Jitter() / RenderSize(), ShadingChangeRenderSize(), ShadingChangeMaxRenderSize()); + const FfxFloat32 fShadingChange = ffxSaturate(SampleShadingChange(fShadingChangeUv)); + + return fShadingChange; +} + +void PrepareReactivity(FfxInt32x2 iPxPos) +{ + const FfxFloat32x2 fUv = (iPxPos + 0.5f) / RenderSize(); + const FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos); + + // Discard small mvs + const FfxFloat32 f4KVelocity = Get4KVelocity(fMotionVector); + + const FfxFloat32x2 fDilatedUv = fUv + fMotionVector; + const FfxFloat32 fDilatedDepth = LoadDilatedDepth(iPxPos); + const FfxFloat32 fDepthInMeters = GetViewSpaceDepthInMeters(fDilatedDepth); + + const FfxFloat32 fDisocclusion = ComputeDisocclusions(fUv, fMotionVector, GetViewSpaceDepth(fDilatedDepth)); + const FfxFloat32 fShadingChange = ffxMax(DilateReactiveMasks(iPxPos, fUv), ComputeShadingChange(fUv)); + + const FfxFloat32 fMotionDivergence = ComputeMotionDivergence(fUv, fMotionVector, fDilatedDepth); + const FfxFloat32 fDilatedTransparencyAndComposition = DilateTransparencyAndCompositionMasks(iPxPos, fUv); + const FfxFloat32 fFinalReactiveness = ffxMax(fMotionDivergence, fDilatedTransparencyAndComposition); + + const FfxFloat32 fAccumulation = UpdateAccumulation(iPxPos, fUv, fMotionVector, fDisocclusion, fShadingChange); + + FfxFloat32x4 fOutput; + fOutput[REACTIVE] = fFinalReactiveness; + fOutput[DISOCCLUSION] = fDisocclusion; + fOutput[SHADING_CHANGE] = fShadingChange; + fOutput[ACCUMULAION] = fAccumulation; + + StoreDilatedReactiveMasks(iPxPos, fOutput); + + const FfxFloat32 fLockStrength = ComputeThinFeatureConfidence(iPxPos); + if (fLockStrength > (1.0f / 100.0f)) + { + StoreNewLocks(ComputeHrPosFromLrPos(FfxInt32x2(iPxPos)), fLockStrength); + } +} diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity.h.meta b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity.h.meta new file mode 100644 index 0000000..0f6a17a --- /dev/null +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: d3e61797324ec384b96b9c14d17b34d1 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h index 77619a5..af63e13 100644 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,7 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - #define GROUP_SIZE 8 #define FSR_RCAS_DENOISE 1 @@ -35,7 +35,7 @@ FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p) { FfxFloat32x4 fColor = LoadRCAS_Input(p); - fColor.rgb = PrepareRgb(fColor.rgb, Exposure(), PreExposure()); + fColor.rgb *= Exposure(); return fColor; } @@ -48,7 +48,7 @@ void CurrFilter(FFX_MIN16_U2 pos) FfxFloat32x3 c; FsrRcasF(c.r, c.g, c.b, pos, RCASConfig()); - c = UnprepareRgb(c, Exposure()); + c /= Exposure(); WriteUpscaledOutput(pos, c); } diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h deleted file mode 100644 index a822dfc..0000000 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h +++ /dev/null @@ -1,146 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - - -#ifndef FFX_FSR3UPSCALER_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H -#define FFX_FSR3UPSCALER_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H - -void ReconstructPrevDepth(FfxInt32x2 iPxPos, FfxFloat32 fDepth, FfxFloat32x2 fMotionVector, FfxInt32x2 iPxDepthSize) -{ - fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.1f); - - FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5)) / iPxDepthSize; - FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; - - BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, RenderSize()); - - // Project current depth into previous frame locations. - // Push to all pixels having some contribution if reprojection is using bilinear logic. - for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) { - - const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; - FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex]; - - if (fWeight > fReconstructedDepthBilinearWeightThreshold) { - - FfxInt32x2 iStorePos = bilinearInfo.iBasePos + iOffset; - if (IsOnScreen(iStorePos, iPxDepthSize)) { - StoreReconstructedDepth(iStorePos, fDepth); - } - } - } -} - -void FindNearestDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxInt32x2 iPxSize, FFX_PARAMETER_OUT FfxFloat32 fNearestDepth, FFX_PARAMETER_OUT FfxInt32x2 fNearestDepthCoord) -{ - const FfxInt32 iSampleCount = 9; - const FfxInt32x2 iSampleOffsets[iSampleCount] = { - FfxInt32x2(+0, +0), - FfxInt32x2(+1, +0), - FfxInt32x2(+0, +1), - FfxInt32x2(+0, -1), - FfxInt32x2(-1, +0), - FfxInt32x2(-1, +1), - FfxInt32x2(+1, +1), - FfxInt32x2(-1, -1), - FfxInt32x2(+1, -1), - }; - - // pull out the depth loads to allow SC to batch them - FfxFloat32 depth[9]; - FfxInt32 iSampleIndex = 0; - FFX_UNROLL - for (iSampleIndex = 0; iSampleIndex < iSampleCount; ++iSampleIndex) { - - FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex]; - depth[iSampleIndex] = LoadInputDepth(iPos); - } - - // find closest depth - fNearestDepthCoord = iPxPos; - fNearestDepth = depth[0]; - FFX_UNROLL - for (iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex) { - - FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex]; - if (IsOnScreen(iPos, iPxSize)) { - - FfxFloat32 fNdDepth = depth[iSampleIndex]; -#if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH - if (fNdDepth > fNearestDepth) { -#else - if (fNdDepth < fNearestDepth) { -#endif - fNearestDepthCoord = iPos; - fNearestDepth = fNdDepth; - } - } - } -} - -FfxFloat32 ComputeLockInputLuma(FfxInt32x2 iPxLrPos) -{ - //We assume linear data. if non-linear input (sRGB, ...), - //then we should convert to linear first and back to sRGB on output. - FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos)); - - // Use internal auto exposure for locking logic - fRgb /= PreExposure(); - fRgb *= Exposure(); - -#if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT - fRgb = Tonemap(fRgb); -#endif - - //compute luma used to lock pixels, if used elsewhere the ffxPow must be moved! - const FfxFloat32 fLockInputLuma = ffxPow(RGBToPerceivedLuma(fRgb), FfxFloat32(1.0 / 6.0)); - - return fLockInputLuma; -} - -void ReconstructAndDilate(FfxInt32x2 iPxLrPos) -{ - FfxFloat32 fDilatedDepth; - FfxInt32x2 iNearestDepthCoord; - - FindNearestDepth(iPxLrPos, RenderSize(), fDilatedDepth, iNearestDepthCoord); - -#if FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS - FfxInt32x2 iSamplePos = iPxLrPos; - FfxInt32x2 iMotionVectorPos = iNearestDepthCoord; -#else - FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxLrPos); - FfxInt32x2 iMotionVectorPos = ComputeHrPosFromLrPos(iNearestDepthCoord); -#endif - - FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iMotionVectorPos); - - StoreDilatedDepth(iPxLrPos, fDilatedDepth); - StoreDilatedMotionVector(iPxLrPos, fDilatedMotionVector); - - ReconstructPrevDepth(iPxLrPos, fDilatedDepth, fDilatedMotionVector, RenderSize()); - - FfxFloat32 fLockInputLuma = ComputeLockInputLuma(iPxLrPos); - StoreLockInputLuma(iPxLrPos, fLockInputLuma); -} - - -#endif //!defined( FFX_FSR3UPSCALER_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H ) diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h.meta b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h.meta deleted file mode 100644 index a1fd018..0000000 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h.meta +++ /dev/null @@ -1,65 +0,0 @@ -fileFormatVersion: 2 -guid: c8b3854bad30a8b40babc5a9805f294e -PluginImporter: - externalObjects: {} - serializedVersion: 2 - iconMap: {} - executionOrder: {} - defineConstraints: [] - isPreloaded: 0 - isOverridable: 0 - isExplicitlyReferenced: 0 - validateReferences: 1 - platformData: - - first: - : Any - second: - enabled: 0 - settings: - Exclude Editor: 1 - Exclude GameCoreScarlett: 1 - Exclude GameCoreXboxOne: 1 - Exclude Linux64: 1 - Exclude OSXUniversal: 1 - Exclude PS4: 1 - Exclude PS5: 1 - Exclude Win: 1 - Exclude Win64: 1 - - first: - Any: - second: - enabled: 0 - settings: {} - - first: - Editor: Editor - second: - enabled: 0 - settings: - DefaultValueInitialized: true - - first: - Standalone: Linux64 - second: - enabled: 0 - settings: - CPU: None - - first: - Standalone: OSXUniversal - second: - enabled: 0 - settings: - CPU: None - - first: - Standalone: Win - second: - enabled: 0 - settings: - CPU: None - - first: - Standalone: Win64 - second: - enabled: 0 - settings: - CPU: None - userData: - assetBundleName: - assetBundleVariant: diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h index 29b7584..45812a6 100644 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,10 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - -#ifndef FFX_FSR3UPSCALER_REPROJECT_H -#define FFX_FSR3UPSCALER_REPROJECT_H - #ifndef FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE #define FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE 0 // Reference #endif @@ -32,106 +29,36 @@ FfxFloat32x4 WrapHistory(FfxInt32x2 iPxSample) return LoadHistory(iPxSample); } -#if FFX_HALF -FFX_MIN16_F4 WrapHistory(FFX_MIN16_I2 iPxSample) -{ - return FFX_MIN16_F4(LoadHistory(iPxSample)); -} -#endif - - -#if FFX_FSR3UPSCALER_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF -DeclareCustomFetchBicubicSamplesMin16(FetchHistorySamples, WrapHistory) -DeclareCustomTextureSampleMin16(HistorySample, FFX_FSR3UPSCALER_GET_LANCZOS_SAMPLER1D(FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples) -#else DeclareCustomFetchBicubicSamples(FetchHistorySamples, WrapHistory) DeclareCustomTextureSample(HistorySample, FFX_FSR3UPSCALER_GET_LANCZOS_SAMPLER1D(FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples) -#endif - -FfxFloat32x4 WrapLockStatus(FfxInt32x2 iPxSample) -{ - FfxFloat32x4 fSample = FfxFloat32x4(LoadLockStatus(iPxSample), 0.0f, 0.0f); - return fSample; -} - -#if FFX_HALF -FFX_MIN16_F4 WrapLockStatus(FFX_MIN16_I2 iPxSample) -{ - FFX_MIN16_F4 fSample = FFX_MIN16_F4(LoadLockStatus(iPxSample), 0.0, 0.0); - - return fSample; -} -#endif - -#if 1 -#if FFX_FSR3UPSCALER_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF -DeclareCustomFetchBilinearSamplesMin16(FetchLockStatusSamples, WrapLockStatus) -DeclareCustomTextureSampleMin16(LockStatusSample, Bilinear, FetchLockStatusSamples) -#else -DeclareCustomFetchBilinearSamples(FetchLockStatusSamples, WrapLockStatus) -DeclareCustomTextureSample(LockStatusSample, Bilinear, FetchLockStatusSamples) -#endif -#else -#if FFX_FSR3UPSCALER_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF -DeclareCustomFetchBicubicSamplesMin16(FetchLockStatusSamples, WrapLockStatus) -DeclareCustomTextureSampleMin16(LockStatusSample, FFX_FSR3UPSCALER_GET_LANCZOS_SAMPLER1D(FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchLockStatusSamples) -#else -DeclareCustomFetchBicubicSamples(FetchLockStatusSamples, WrapLockStatus) -DeclareCustomTextureSample(LockStatusSample, FFX_FSR3UPSCALER_GET_LANCZOS_SAMPLER1D(FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchLockStatusSamples) -#endif -#endif FfxFloat32x2 GetMotionVector(FfxInt32x2 iPxHrPos, FfxFloat32x2 fHrUv) { #if FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS - FfxFloat32x2 fDilatedMotionVector = LoadDilatedMotionVector(FFX_MIN16_I2(fHrUv * RenderSize())); + const FfxFloat32x2 fDilatedMotionVector = LoadDilatedMotionVector(FFX_MIN16_I2(fHrUv * RenderSize())); #else - FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iPxHrPos); + const FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iPxHrPos); #endif return fDilatedMotionVector; } -FfxBoolean IsUvInside(FfxFloat32x2 fUv) -{ - return (fUv.x >= 0.0f && fUv.x <= 1.0f) && (fUv.y >= 0.0f && fUv.y <= 1.0f); -} - -void ComputeReprojectedUVs(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxBoolean bIsExistingSample) +void ComputeReprojectedUVs(FFX_PARAMETER_INOUT AccumulationPassCommonParams params) { - fReprojectedHrUv = params.fHrUv + params.fMotionVector; + params.fReprojectedHrUv = params.fHrUv + params.fMotionVector; - bIsExistingSample = IsUvInside(fReprojectedHrUv); + params.bIsExistingSample = IsUvInside(params.fReprojectedHrUv); } -void ReprojectHistoryColor(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x3 fHistoryColor, FFX_PARAMETER_OUT FfxFloat32 fTemporalReactiveFactor, FFX_PARAMETER_OUT FfxBoolean bInMotionLastFrame) +void ReprojectHistoryColor(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) { - FfxFloat32x4 fHistory = HistorySample(params.fReprojectedHrUv, DisplaySize()); + const FfxFloat32x4 fReprojectedHistory = HistorySample(params.fReprojectedHrUv, PreviousFrameUpscaleSize()); - fHistoryColor = PrepareRgb(fHistory.rgb, Exposure(), PreviousFramePreExposure()); + data.fHistoryColor = fReprojectedHistory.rgb; + data.fHistoryColor *= DeltaPreExposure(); + data.fHistoryColor *= Exposure(); - fHistoryColor = RGBToYCoCg(fHistoryColor); + data.fHistoryColor = RGBToYCoCg(data.fHistoryColor); - //Compute temporal reactivity info - fTemporalReactiveFactor = ffxSaturate(abs(fHistory.w)); - bInMotionLastFrame = (fHistory.w < 0.0f); + data.fLock = fReprojectedHistory.w; } - -LockState ReprojectHistoryLockStatus(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedLockStatus) -{ - LockState state = { FFX_FALSE, FFX_FALSE }; - const FfxFloat32 fNewLockIntensity = LoadRwNewLocks(params.iPxHrPos); - state.NewLock = fNewLockIntensity > (127.0f / 255.0f); - - FfxFloat32 fInPlaceLockLifetime = state.NewLock ? fNewLockIntensity : 0; - - fReprojectedLockStatus = SampleLockStatus(params.fReprojectedHrUv); - - if (fReprojectedLockStatus[LOCK_LIFETIME_REMAINING] != FfxFloat32(0.0f)) { - state.WasLockedPrevFrame = true; - } - - return state; -} - -#endif //!defined( FFX_FSR3UPSCALER_REPROJECT_H ) diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h index d98cfcc..b3d8ddb 100644 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,7 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - #ifndef FFX_FSR3UPSCALER_RESOURCES_H #define FFX_FSR3UPSCALER_RESOURCES_H @@ -36,63 +36,59 @@ #define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS 9 #define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_DEPTH 10 #define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR 11 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LOCK_STATUS 12 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION 12 #define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NEW_LOCKS 13 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR 14 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY 15 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DEBUG_OUTPUT 16 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LANCZOS_LUT 17 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT 18 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT 19 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RCAS_INPUT 20 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LOCK_STATUS_1 21 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LOCK_STATUS_2 22 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1 23 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 24 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY 25 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_TRANSPARENCY_AND_COMPOSITION 26 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT 27 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS 28 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE 29 // same as FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0 29 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_1 30 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_2 31 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_3 32 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4 33 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5 34 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_6 35 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_7 36 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_8 37 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_9 38 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_10 39 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_11 40 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12 41 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE 42 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_AUTO_EXPOSURE 43 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_AUTOREACTIVE 44 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_AUTOCOMPOSITION_DEPRECATED 45 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY 14 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DEBUG_OUTPUT 15 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LANCZOS_LUT 16 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT 17 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT 18 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RCAS_INPUT 19 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION_1 20 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION_2 21 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1 22 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 23 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY 24 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_TRANSPARENCY_AND_COMPOSITION 25 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS 26 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS 27 // same as FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_0 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_0 27 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_1 28 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_2 29 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_3 30 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_4 31 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_5 32 + +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE 33 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FRAME_INFO 34 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_AUTOREACTIVE 35 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_AUTOCOMPOSITION_DEPRECATED 36 + +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_1 37 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_2 38 + +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_1 40 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_2 41 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SHADING_CHANGE 42 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH 43 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH_MIP1 44 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_CURRENT_LUMA 45 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREVIOUS_LUMA 46 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_INSTABILITY 48 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERMEDIATE_FP16x1 49 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR 46 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR 47 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1 48 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1 49 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2 50 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2 51 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS 52 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_1 53 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_2 54 -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA 55 // Shading change detection mip level setting, value must be in the range [FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0, FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12] -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4 -#define FFX_FSR3UPSCALER_SHADING_CHANGE_MIP_LEVEL (FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE - FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE) +//#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4 +//#define FFX_FSR3UPSCALER_SHADING_CHANGE_MIP_LEVEL (FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE - FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE) -#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT 56 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT 60 -#define FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_FSR3UPSCALER 0 +#define FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_FSR3UPSCALER 0 #define FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_SPD 1 #define FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_RCAS 2 #define FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE 3 +#define FFX_FSR3UPSCALER_CONSTANTBUFFER_COUNT 4 #define FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_TONEMAP 1 #define FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP 2 diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h index d33f70c..5f727b1 100644 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,7 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - #ifndef FFX_FSR3UPSCALER_SAMPLE_H #define FFX_FSR3UPSCALER_SAMPLE_H @@ -495,20 +495,16 @@ FFX_MIN16_F4 Lanczos2Approx(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPx FfxInt32x2 ClampCoord(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize) { FfxInt32x2 result = iPxSample + iPxOffset; - result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x; - result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x; - result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y; - result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y; + result.x = ffxMax(1, ffxMin(result.x, iTextureSize.x - 2)); + result.y = ffxMax(1, ffxMin(result.y, iTextureSize.y - 2)); return result; } #if FFX_HALF FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize) { FFX_MIN16_I2 result = iPxSample + iPxOffset; - result.x = (iPxOffset.x < FFX_MIN16_I(0)) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x; - result.x = (iPxOffset.x > FFX_MIN16_I(0)) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x; - result.y = (iPxOffset.y < FFX_MIN16_I(0)) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y; - result.y = (iPxOffset.y > FFX_MIN16_I(0)) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y; + result.x = ffxMax(FFX_MIN16_I(1), ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(2))); + result.y = ffxMax(FFX_MIN16_I(1), ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(2))); return result; } #endif //FFX_HALF @@ -571,12 +567,12 @@ FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN1 FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ { \ FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \ + FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \ /* Clamp base coords */ \ - fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \ - fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \ + fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x-1), fPxSample.x)); \ + fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y-1), fPxSample.y)); \ /* */ \ FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ - FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \ FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ return fColorXY; \ } @@ -585,12 +581,12 @@ FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN1 FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ { \ FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \ + FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \ /* Clamp base coords */ \ fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \ fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \ /* */ \ FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ - FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \ FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ return fColorXY; \ } diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change.h new file mode 100644 index 0000000..2eb23aa --- /dev/null +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change.h @@ -0,0 +1,68 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +FFX_STATIC const FfxInt32 s_MipLevelsToUse = 3; + +struct ShadingChangeLumaInfo +{ + FfxFloat32 fSamples[s_MipLevelsToUse]; +}; + +ShadingChangeLumaInfo ComputeShadingChangeLuma(FfxInt32x2 iPxPos, FfxFloat32x2 fUv, const FfxInt32x2 iCurrentSize) +{ + ShadingChangeLumaInfo info; + + const FfxFloat32x2 fMipUv = ClampUv(fUv, ShadingChangeRenderSize(), GetSPDMipDimensions(0)); + + FFX_UNROLL + for (FfxInt32 iMipLevel = iShadingChangeMipStart; iMipLevel < s_MipLevelsToUse; iMipLevel++) { + + const FfxFloat32x2 fSample = SampleSPDMipLevel(fMipUv, iMipLevel); + + info.fSamples[iMipLevel] = abs(fSample.x * fSample.y); + } + + return info; +} + +void ShadingChange(FfxInt32x2 iPxPos) +{ + if (IsOnScreen(FfxInt32x2(iPxPos), ShadingChangeRenderSize())) { + + const FfxFloat32x2 fUv = (iPxPos + 0.5f) / ShadingChangeRenderSize(); + const FfxFloat32x2 fUvJittered = fUv + Jitter() / RenderSize(); + + const ShadingChangeLumaInfo info = ComputeShadingChangeLuma(iPxPos, fUvJittered, ShadingChangeRenderSize()); + + const FfxFloat32 fScale = 1.0f + iShadingChangeMipStart / s_MipLevelsToUse; + FfxFloat32 fShadingChange = 0.0f; + FFX_UNROLL + for (int iMipLevel = iShadingChangeMipStart; iMipLevel < s_MipLevelsToUse; iMipLevel++) + { + if (info.fSamples[iMipLevel] > 0) { + fShadingChange = ffxMax(fShadingChange, info.fSamples[iMipLevel]) * fScale; + } + } + + StoreShadingChange(iPxPos, ffxSaturate(fShadingChange)); + } +} diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change.h.meta b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change.h.meta new file mode 100644 index 0000000..128ba08 --- /dev/null +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: c473fdf032dbf5142a97d6c5c40ebb12 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid.h new file mode 100644 index 0000000..63ca5d6 --- /dev/null +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid.h @@ -0,0 +1,297 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +FFX_GROUPSHARED FfxUInt32 spdCounter; + +void SpdIncreaseAtomicCounter(FfxUInt32 slice) +{ + SPD_IncreaseAtomicCounter(spdCounter); +} + +FfxUInt32 SpdGetAtomicCounter() +{ + return spdCounter; +} + +void SpdResetAtomicCounter(FfxUInt32 slice) +{ + SPD_ResetAtomicCounter(); +} + +#ifndef SPD_PACKED_ONLY +FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16]; + +FFX_STATIC const FfxInt32 DIFFERENCE = 0; +FFX_STATIC const FfxInt32 SIGN_SUM = 1; +FFX_STATIC const FfxInt32 MIP0_INDICATOR = 2; + +FfxFloat32x2 Sort2(FfxFloat32x2 v) +{ + return FfxFloat32x2(ffxMin(v.x, v.y), ffxMax(v.x, v.y)); +} + +struct SampleSet +{ + FfxFloat32 fSamples[SHADING_CHANGE_SET_SIZE]; +}; + +#define CompareSwap(i, j) \ +{ \ +FfxFloat32 fTmp = ffxMin(fSet.fSamples[i], fSet.fSamples[j]);\ +fSet.fSamples[j] = ffxMax(fSet.fSamples[i], fSet.fSamples[j]);\ +fSet.fSamples[i] = fTmp;\ +} + +#if SHADING_CHANGE_SET_SIZE == 5 +FFX_STATIC const FfxInt32x2 iSampleOffsets[5] = {FfxInt32x2(+0, +0), FfxInt32x2(-1, +0), FfxInt32x2(+1, +0), FfxInt32x2(+0, -1), FfxInt32x2(+0, +1)}; + +void SortSet(FFX_PARAMETER_INOUT SampleSet fSet) +{ + CompareSwap(0, 3); + CompareSwap(1, 4); + CompareSwap(0, 2); + CompareSwap(1, 3); + CompareSwap(0, 1); + CompareSwap(2, 4); + CompareSwap(1, 2); + CompareSwap(3, 4); + CompareSwap(2, 3); +} +#endif + +FfxFloat32 ComputeMinimumDifference(FfxInt32x2 iPxPos, SampleSet fSet0, SampleSet fSet1) +{ + FfxFloat32 fMinDiff = FSR3UPSCALER_FP16_MAX - 1; + FfxInt32 a = 0; + FfxInt32 b = 0; + + SortSet(fSet0); + SortSet(fSet1); + + const FfxFloat32 fMax = ffxMin(fSet0.fSamples[SHADING_CHANGE_SET_SIZE-1], fSet1.fSamples[SHADING_CHANGE_SET_SIZE-1]); + + if (fMax > FSR3UPSCALER_FP32_MIN) { + + FFX_UNROLL + for (FfxInt32 i = 0; i < SHADING_CHANGE_SET_SIZE && (fMinDiff < FSR3UPSCALER_FP16_MAX); i++) { + + FfxFloat32 fDiff = fSet0.fSamples[a] - fSet1.fSamples[b]; + + if (abs(fDiff) > FSR3UPSCALER_FP16_MIN) { + + fDiff = sign(fDiff) * (1.0f - MinDividedByMax(fSet0.fSamples[a], fSet1.fSamples[b])); + + fMinDiff = (abs(fDiff) < abs(fMinDiff)) ? fDiff : fMinDiff; + + a += FfxInt32(fSet0.fSamples[a] < fSet1.fSamples[b]); + b += FfxInt32(fSet0.fSamples[a] >= fSet1.fSamples[b]); + } + else + { + fMinDiff = FSR3UPSCALER_FP16_MAX; + } + } + } + + return fMinDiff * FfxFloat32(fMinDiff < (FSR3UPSCALER_FP16_MAX - 1)); +} + +SampleSet GetCurrentLumaBilinearSamples(FfxFloat32x2 fUv) +{ + const FfxFloat32x2 fUvJittered = fUv + Jitter() / RenderSize(); + const FfxInt32x2 iBasePos = FfxInt32x2(floor(fUvJittered * RenderSize())); + + SampleSet fSet; + + for (FfxInt32 iSampleIndex = 0; iSampleIndex < SHADING_CHANGE_SET_SIZE; iSampleIndex++) { + const FfxInt32x2 iSamplePos = ClampLoad(iBasePos, iSampleOffsets[iSampleIndex], RenderSize()); + fSet.fSamples[iSampleIndex] = LoadCurrentLuma(iSamplePos) * Exposure(); + fSet.fSamples[iSampleIndex] = ffxPow(fSet.fSamples[iSampleIndex], fShadingChangeSamplePow); + fSet.fSamples[iSampleIndex] = ffxMax(fSet.fSamples[iSampleIndex], FSR3UPSCALER_EPSILON); + } + + return fSet; +} + +struct PreviousLumaBilinearSamplesData +{ + SampleSet fSet; + FfxBoolean bIsExistingSample; +}; + +PreviousLumaBilinearSamplesData GetPreviousLumaBilinearSamples(FfxFloat32x2 fUv, FfxFloat32x2 fMotionVector) +{ + PreviousLumaBilinearSamplesData data; + + const FfxFloat32x2 fUvJittered = fUv + PreviousFrameJitter() / PreviousFrameRenderSize(); + const FfxFloat32x2 fReprojectedUv = fUvJittered + fMotionVector; + + data.bIsExistingSample = IsUvInside(fReprojectedUv); + + if (data.bIsExistingSample) { + + const FfxInt32x2 iBasePos = FfxInt32x2(floor(fReprojectedUv * PreviousFrameRenderSize())); + + for (FfxInt32 iSampleIndex = 0; iSampleIndex < SHADING_CHANGE_SET_SIZE; iSampleIndex++) { + + const FfxInt32x2 iSamplePos = ClampLoad(iBasePos, iSampleOffsets[iSampleIndex], PreviousFrameRenderSize()); + data.fSet.fSamples[iSampleIndex] = LoadPreviousLuma(iSamplePos) * DeltaPreExposure() * Exposure(); + data.fSet.fSamples[iSampleIndex] = ffxPow(data.fSet.fSamples[iSampleIndex], fShadingChangeSamplePow); + data.fSet.fSamples[iSampleIndex] = ffxMax(data.fSet.fSamples[iSampleIndex], FSR3UPSCALER_EPSILON); + } + } + + return data; +} + +FfxFloat32 ComputeDiff(FfxInt32x2 iPxPos, FfxFloat32x2 fUv, FfxFloat32x2 fMotionVector) +{ + FfxFloat32 fMinDiff = 0.0f; + + const SampleSet fCurrentSamples = GetCurrentLumaBilinearSamples(fUv); + const PreviousLumaBilinearSamplesData previousData = GetPreviousLumaBilinearSamples(fUv, fMotionVector); + + if (previousData.bIsExistingSample) { + fMinDiff = ComputeMinimumDifference(iPxPos, fCurrentSamples, previousData.fSet); + } + + return fMinDiff; +} + +FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 iPxPos, FfxUInt32 slice) +{ + const FfxInt32x2 iPxSamplePos = ClampLoad(FfxInt32x2(iPxPos), FfxInt32x2(0, 0), FfxInt32x2(RenderSize())); + const FfxFloat32x2 fDilatedMotionVector = LoadDilatedMotionVector(iPxSamplePos); + const FfxFloat32x2 fUv = (iPxSamplePos + 0.5f) / RenderSize(); + + const FfxFloat32 fScaledAndSignedLumaDiff = ComputeDiff(iPxSamplePos, fUv, fDilatedMotionVector); + + FfxFloat32x4 fOutput = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f); + fOutput[DIFFERENCE] = fScaledAndSignedLumaDiff; + fOutput[SIGN_SUM] = (fScaledAndSignedLumaDiff != 0.0f) ? sign(fScaledAndSignedLumaDiff) : 0.0f; + fOutput[MIP0_INDICATOR] = 1.0f; + + return fOutput; +} + +FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice) +{ + return FfxFloat32x4(RWLoadPyramid(tex, 5), 0, 0); +} + +FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) +{ + return (v0 + v1 + v2 + v3) * 0.25f; +} + +void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice) +{ + if (index >= iShadingChangeMipStart) + { + StorePyramid(pix, outValue.xy, index); + } +} + +FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat32x4( + spdIntermediateR[x][y], + spdIntermediateG[x][y], + spdIntermediateB[x][y], + spdIntermediateA[x][y]); +} +void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) +{ + spdIntermediateR[x][y] = value.x; + spdIntermediateG[x][y] = value.y; + spdIntermediateB[x][y] = value.z; + spdIntermediateA[x][y] = value.w; +} + +#endif + +// define fetch and store functions Packed +#if FFX_HALF + +FFX_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16]; +FFX_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16]; + +FfxFloat16x4 SpdLoadSourceImageH(FfxFloat32x2 tex, FfxUInt32 slice) +{ + return FfxFloat16x4(0, 0, 0, 0); +} + +FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice) +{ + return FfxFloat16x4(0, 0, 0, 0); +} + +void SpdStoreH(FfxInt32x2 p, FfxFloat16x4 value, FfxUInt32 mip, FfxUInt32 slice) +{ +} + +FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat16x4( + spdIntermediateRG[x][y].x, + spdIntermediateRG[x][y].y, + spdIntermediateBA[x][y].x, + spdIntermediateBA[x][y].y); +} + +void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value) +{ + spdIntermediateRG[x][y] = value.xy; + spdIntermediateBA[x][y] = value.zw; +} + +FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3) +{ + return (v0 + v1 + v2 + v3) * FfxFloat16(0.25); +} +#endif + +#include "spd/ffx_spd.h" + +void ComputeShadingChangePyramid(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex) +{ +#if FFX_HALF + SpdDownsampleH( + FfxUInt32x2(WorkGroupId.xy), + FfxUInt32(LocalThreadIndex), + FfxUInt32(MipCount()), + FfxUInt32(NumWorkGroups()), + FfxUInt32(WorkGroupId.z), + FfxUInt32x2(WorkGroupOffset())); +#else + SpdDownsample( + FfxUInt32x2(WorkGroupId.xy), + FfxUInt32(LocalThreadIndex), + FfxUInt32(MipCount()), + FfxUInt32(NumWorkGroups()), + FfxUInt32(WorkGroupId.z), + FfxUInt32x2(WorkGroupOffset())); +#endif +} diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid.h.meta b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid.h.meta new file mode 100644 index 0000000..83ba87c --- /dev/null +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 26c560eb3a1c18645ab5a44c238e39f8 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h index 47e7ccf..2d587f0 100644 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,22 +20,10 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - -#ifndef FFX_FSR3UPSCALER_UPSAMPLE_H -#define FFX_FSR3UPSCALER_UPSAMPLE_H - -FFX_STATIC const FfxUInt32 iLanczos2SampleCount = 16; - void Deringing(RectificationBox clippingBox, FFX_PARAMETER_INOUT FfxFloat32x3 fColor) { fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax); } -#if FFX_HALF -void Deringing(RectificationBoxMin16 clippingBox, FFX_PARAMETER_INOUT FFX_MIN16_F3 fColor) -{ - fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax); -} -#endif #ifndef FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE #define FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE 2 // Approximate @@ -55,52 +44,29 @@ FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32 fK return fSampleWeight; } -#if FFX_HALF -FFX_MIN16_F GetUpsampleLanczosWeight(FFX_MIN16_F2 fSrcSampleOffset, FFX_MIN16_F fKernelWeight) -{ - FFX_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx; -#if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE - FFX_MIN16_F fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased)); -#elif FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT - FFX_MIN16_F fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased)); -#elif FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE - FFX_MIN16_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); +FfxFloat32 ComputeMaxKernelWeight(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) { - // To Test: Save reciproqual sqrt compute - // FfxFloat32 fSampleWeight = Lanczos2Sq_UseLUT(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); -#else -#error "Invalid Lanczos type" -#endif - return fSampleWeight; -} -#endif + const FfxFloat32 fKernelSizeBias = 1.0f + (1.0f / FfxFloat32x2(DownscaleFactor()) - 1.0f).x; -FfxFloat32 ComputeMaxKernelWeight() { - const FfxFloat32 fKernelSizeBias = 1.0f; + return ffxMin(FfxFloat32(1.99f), fKernelSizeBias); +} - FfxFloat32 fKernelWeight = FfxFloat32(1) + (FfxFloat32(1.0f) / FfxFloat32x2(DownscaleFactor()) - FfxFloat32(1)).x * FfxFloat32(fKernelSizeBias); +FfxFloat32x3 LoadPreparedColor(FfxInt32x2 iSamplePos) +{ + const FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iSamplePos)) * Exposure(); + const FfxFloat32x3 fPreparedYCoCg = RGBToYCoCg(fRgb); - return ffxMin(FfxFloat32(1.99f), fKernelWeight); + return fPreparedYCoCg; } -FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params, - FFX_PARAMETER_INOUT RectificationBox clippingBox, FfxFloat32 fReactiveFactor) +void ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) { - #if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF - #include "ffx_fsr3upscaler_force16_begin.h" - #endif // We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly) - FfxFloat32x2 fDstOutputPos = FfxFloat32x2(params.iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f); // Destination resolution output pixel center position - FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor(); // Source resolution output pixel center position - FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos)); // TODO: what about weird upscale factors... - - #if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF - #include "ffx_fsr3upscaler_force16_end.h" - #endif - - FfxFloat32x3 fSamples[iLanczos2SampleCount]; - - FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0 + const FfxFloat32x2 fDstOutputPos = FfxFloat32x2(params.iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f); + const FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor(); + const FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos)); + const FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0 + const FfxFloat32x2 fBaseSampleOffset = FfxFloat32x2(fSrcUnjitteredPos - fSrcOutputPos); FfxInt32x2 offsetTL; offsetTL.x = (fSrcUnjitteredPos.x > fSrcOutputPos.x) ? FfxInt32(-2) : FfxInt32(-1); @@ -112,84 +78,107 @@ FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams p // This reduces branch or cmove on sampled colors, but moving this overhead to sample position / weight calculation time which apply to less values. const FfxBoolean bFlipRow = fSrcUnjitteredPos.y > fSrcOutputPos.y; const FfxBoolean bFlipCol = fSrcUnjitteredPos.x > fSrcOutputPos.x; + const FfxFloat32x2 fOffsetTL = FfxFloat32x2(offsetTL); - FfxFloat32x2 fOffsetTL = FfxFloat32x2(offsetTL); + const FfxBoolean bIsInitialSample = (params.fAccumulation == 0.0f); + + FfxFloat32x3 fSamples[9]; + FfxInt32 iSampleIndex = 0; FFX_UNROLL for (FfxInt32 row = 0; row < 3; row++) { - FFX_UNROLL - for (FfxInt32 col = 0; col < 3; col++) { - FfxInt32 iSampleIndex = col + (row << 2); - - FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); - FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + offsetTL + sampleColRow; + for (FfxInt32 col = 0; col < 3; col++) { + const FfxInt32x2 iSampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); + const FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + offsetTL + iSampleColRow; + const FfxInt32x2 iSampleCoord = ClampLoad(iSrcSamplePos, FfxInt32x2(0, 0), FfxInt32x2(RenderSize())); - const FfxInt32x2 sampleCoord = ClampLoad(iSrcSamplePos, FfxInt32x2(0, 0), FfxInt32x2(RenderSize())); + fSamples[iSampleIndex] = LoadPreparedColor(iSampleCoord); - fSamples[iSampleIndex] = LoadPreparedInputColor(FfxInt32x2(sampleCoord)); - } + ++iSampleIndex; + } } - FfxFloat32x4 fColorAndWeight = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f); - - FfxFloat32x2 fBaseSampleOffset = FfxFloat32x2(fSrcUnjitteredPos - fSrcOutputPos); +#if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT + if (bIsInitialSample) + { + for (iSampleIndex = 0; iSampleIndex < 9; ++iSampleIndex) + { + //YCoCg -> RGB -> Tonemap -> YCoCg (Use RGB tonemapper to avoid color desaturation) + fSamples[iSampleIndex] = RGBToYCoCg(Tonemap(YCoCgToRGB(fSamples[iSampleIndex]))); + } + } +#endif // Identify how much of each upsampled color to be used for this frame - const FfxFloat32 fKernelReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32(params.bIsNewSample)); - const FfxFloat32 fKernelBiasMax = ComputeMaxKernelWeight() * (1.0f - fKernelReactiveFactor); + const FfxFloat32 fKernelBiasMax = ComputeMaxKernelWeight(params, data); + const FfxFloat32 fKernelBiasMin = ffxMax(1.0f, ((1.0f + fKernelBiasMax) * 0.3f)); + + const FfxFloat32 fKernelBiasWeight = + ffxMin(1.0f - params.fDisocclusion * 0.5f, + ffxMin(1.0f - params.fShadingChange, + ffxSaturate(data.fHistoryWeight * 5.0f) + )); - const FfxFloat32 fKernelBiasMin = ffxMax(1.0f, ((1.0f + fKernelBiasMax) * 0.3f)); - const FfxFloat32 fKernelBiasFactor = ffxMax(0.0f, ffxMax(0.25f * params.fDepthClipFactor, fKernelReactiveFactor)); - const FfxFloat32 fKernelBias = ffxLerp(fKernelBiasMax, fKernelBiasMin, fKernelBiasFactor); + const FfxFloat32 fKernelBias = ffxLerp(fKernelBiasMin, fKernelBiasMax, fKernelBiasWeight); + - const FfxFloat32 fRectificationCurveBias = ffxLerp(-2.0f, -3.0f, ffxSaturate(params.fHrVelocity / 50.0f)); + iSampleIndex = 0; FFX_UNROLL - for (FfxInt32 row = 0; row < 3; row++) { + for (FfxInt32 row = 0; row < 3; row++) + { FFX_UNROLL - for (FfxInt32 col = 0; col < 3; col++) { - FfxInt32 iSampleIndex = col + (row << 2); - - const FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); - const FfxFloat32x2 fOffset = fOffsetTL + FfxFloat32x2(sampleColRow); - FfxFloat32x2 fSrcSampleOffset = fBaseSampleOffset + fOffset; - - FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow; + for (FfxInt32 col = 0; col < 3; col++) + { + const FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); + const FfxFloat32x2 fOffset = fOffsetTL + FfxFloat32x2(sampleColRow); + const FfxFloat32x2 fSrcSampleOffset = fBaseSampleOffset + fOffset; + const FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow; const FfxFloat32 fOnScreenFactor = FfxFloat32(IsOnScreen(FfxInt32x2(iSrcSamplePos), FfxInt32x2(RenderSize()))); - FfxFloat32 fSampleWeight = fOnScreenFactor * FfxFloat32(GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelBias)); - fColorAndWeight += FfxFloat32x4(fSamples[iSampleIndex] * fSampleWeight, fSampleWeight); + if (!bIsInitialSample) + { + const FfxFloat32 fSampleWeight = fOnScreenFactor * FfxFloat32(GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelBias)); + + data.fUpsampledColor += fSamples[iSampleIndex] * fSampleWeight; + data.fUpsampledWeight += fSampleWeight; + } // Update rectification box { + const FfxFloat32 fRectificationCurveBias = -2.3f; const FfxFloat32 fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset); - const FfxFloat32 fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq); + const FfxFloat32 fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq) * fOnScreenFactor; const FfxBoolean bInitialSample = (row == 0) && (col == 0); - RectificationBoxAddSample(bInitialSample, clippingBox, fSamples[iSampleIndex], fBoxSampleWeight); + RectificationBoxAddSample(bInitialSample, data.clippingBox, fSamples[iSampleIndex], fBoxSampleWeight); } + ++iSampleIndex; } } - RectificationBoxComputeVarianceBoxData(clippingBox); + RectificationBoxComputeVarianceBoxData(data.clippingBox); - fColorAndWeight.w *= FfxFloat32(fColorAndWeight.w > FSR3UPSCALER_EPSILON); + data.fUpsampledWeight *= FfxFloat32(data.fUpsampledWeight > FSR3UPSCALER_EPSILON); - if (fColorAndWeight.w > FSR3UPSCALER_EPSILON) { + if (data.fUpsampledWeight > FSR3UPSCALER_EPSILON) { // Normalize for deringing (we need to compare colors) - fColorAndWeight.xyz = fColorAndWeight.xyz / fColorAndWeight.w; - fColorAndWeight.w *= fUpsampleLanczosWeightScale; + data.fUpsampledColor = data.fUpsampledColor / data.fUpsampledWeight; + data.fUpsampledWeight *= fAverageLanczosWeightPerFrame; - Deringing(clippingBox, fColorAndWeight.xyz); + Deringing(data.clippingBox, data.fUpsampledColor); } - #if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF - #include "ffx_fsr3upscaler_force16_end.h" - #endif - - return fColorAndWeight; + // Initial samples using tonemapped upsampling + if (bIsInitialSample) { +#if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT + data.fUpsampledColor = RGBToYCoCg(InverseTonemap(YCoCgToRGB(data.clippingBox.boxCenter))); +#else + data.fUpsampledColor = data.clippingBox.boxCenter; +#endif + data.fUpsampledWeight = 1.0f; + data.fHistoryWeight = 0.0f; + } } - -#endif //!defined( FFX_FSR3UPSCALER_UPSAMPLE_H ) diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/fsr1/ffx_fsr1.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/fsr1/ffx_fsr1.h index e780995..82ebf21 100644 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/fsr1/ffx_fsr1.h +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/fsr1/ffx_fsr1.h @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,7 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - /// @defgroup FfxGPUFsr1 FidelityFX FSR1 /// FidelityFX Super Resolution 1 GPU documentation /// @@ -384,7 +384,7 @@ void ffxFsrEasuFloat( fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.w, zzonG.w, zzonB.w)); // n // Normalize and dering. - pix = ffxMin(max4, max(min4, aC * ffxBroadcast3(rcp(aW)))); + pix = ffxMin(max4, max(min4, aC * ffxBroadcast3(ffxReciprocal(aW)))); } #endif // #if defined(FFX_GPU) && defined(FFX_FSR_EASU_FLOAT) @@ -459,7 +459,7 @@ void FsrEasuSetH( FfxFloat16x2 dirX = lD - lB; dirPX += dirX * w; - lenX = FfxFloat16x2(ffxSaturate(abs(dirX) * lenX)); + lenX = ffxSaturate(abs(dirX) * lenX); lenX *= lenX; lenP += lenX * w; FfxFloat16x2 ec = lE - lC; @@ -468,7 +468,7 @@ void FsrEasuSetH( lenY = ffxReciprocalHalf(lenY); FfxFloat16x2 dirY = lE - lA; dirPY += dirY * w; - lenY = FfxFloat16x2(ffxSaturate(abs(dirY) * lenY)); + lenY = ffxSaturate(abs(dirY) * lenY); lenY *= lenY; lenP += lenY * w; } @@ -666,7 +666,7 @@ void FsrEasuH( sharpness = exp2(-sharpness); FfxFloat32x2 hSharp = {sharpness, sharpness}; con[0] = ffxAsUInt32(sharpness); - con[1] = packHalf2x16(hSharp); + con[1] = ffxPackHalf2x16(hSharp); con[2] = 0; con[3] = 0; } @@ -748,12 +748,12 @@ void FsrEasuH( // Immediate constants for peak range. FfxFloat32x2 peakC = FfxFloat32x2(1.0, -1.0 * 4.0); // Limiters, these need to be high precision RCPs. - FfxFloat32 hitMinR = mn4R * rcp(FfxFloat32(4.0) * mx4R); - FfxFloat32 hitMinG = mn4G * rcp(FfxFloat32(4.0) * mx4G); - FfxFloat32 hitMinB = mn4B * rcp(FfxFloat32(4.0) * mx4B); - FfxFloat32 hitMaxR = (peakC.x - mx4R) * rcp(FfxFloat32(4.0) * mn4R + peakC.y); - FfxFloat32 hitMaxG = (peakC.x - mx4G) * rcp(FfxFloat32(4.0) * mn4G + peakC.y); - FfxFloat32 hitMaxB = (peakC.x - mx4B) * rcp(FfxFloat32(4.0) * mn4B + peakC.y); + FfxFloat32 hitMinR = mn4R * ffxReciprocal(FfxFloat32(4.0) * mx4R); + FfxFloat32 hitMinG = mn4G * ffxReciprocal(FfxFloat32(4.0) * mx4G); + FfxFloat32 hitMinB = mn4B * ffxReciprocal(FfxFloat32(4.0) * mx4B); + FfxFloat32 hitMaxR = (peakC.x - mx4R) * ffxReciprocal(FfxFloat32(4.0) * mn4R + peakC.y); + FfxFloat32 hitMaxG = (peakC.x - mx4G) * ffxReciprocal(FfxFloat32(4.0) * mn4G + peakC.y); + FfxFloat32 hitMaxB = (peakC.x - mx4B) * ffxReciprocal(FfxFloat32(4.0) * mn4B + peakC.y); FfxFloat32 lobeR = max(-hitMinR, hitMaxR); FfxFloat32 lobeG = max(-hitMinG, hitMaxG); FfxFloat32 lobeB = max(-hitMinB, hitMaxB); @@ -836,7 +836,7 @@ void FsrEasuH( FfxFloat16 hL=hB*FFX_BROADCAST_FLOAT16(0.5)+(hR*FFX_BROADCAST_FLOAT16(0.5)+hG); // Noise detection. FfxFloat16 nz=FFX_BROADCAST_FLOAT16(0.25)*bL+FFX_BROADCAST_FLOAT16(0.25)*dL+FFX_BROADCAST_FLOAT16(0.25)*fL+FFX_BROADCAST_FLOAT16(0.25)*hL-eL; - nz=FfxFloat16(ffxSaturate(abs(nz)*ffxApproximateReciprocalMediumHalf(ffxMax3Half(ffxMax3Half(bL,dL,eL),fL,hL)-ffxMin3Half(ffxMin3Half(bL,dL,eL),fL,hL)))); + nz=ffxSaturate(abs(nz)*ffxApproximateReciprocalMediumHalf(ffxMax3Half(ffxMax3Half(bL,dL,eL),fL,hL)-ffxMin3Half(ffxMin3Half(bL,dL,eL),fL,hL))); nz=FFX_BROADCAST_FLOAT16(-0.5)*nz+FFX_BROADCAST_FLOAT16(1.0); // Min and max of ring. FfxFloat16 mn4R=min(ffxMin3Half(bR,dR,fR),hR); @@ -1052,10 +1052,10 @@ void FsrEasuH( #if defined(FFX_GPU) void FsrSrtmF(inout FfxFloat32x3 c) { - c *= ffxBroadcast3(rcp(ffxMax3(c.r, c.g, c.b) + FfxFloat32(1.0))); + c *= ffxBroadcast3(ffxReciprocal(ffxMax3(c.r, c.g, c.b) + FfxFloat32(1.0))); } // The extra max solves the c=1.0 case (which is a /0). - void FsrSrtmInvF(inout FfxFloat32x3 c){c*=ffxBroadcast3(rcp(max(FfxFloat32(1.0/32768.0),FfxFloat32(1.0)-ffxMax3(c.r,c.g,c.b))));} + void FsrSrtmInvF(inout FfxFloat32x3 c){c*=ffxBroadcast3(ffxReciprocal(max(FfxFloat32(1.0/32768.0),FfxFloat32(1.0)-ffxMax3(c.r,c.g,c.b))));} #endif //============================================================================================================================== #if defined(FFX_GPU )&& FFX_HALF == 1 @@ -1177,7 +1177,7 @@ void FsrEasuH( FfxFloat16x3 b = n + FFX_BROADCAST_FLOAT16X3(1.0 / 255.0); b = b * b; FfxFloat16x3 r = (c - b) * ffxApproximateReciprocalMediumHalf(a - b); - c = FfxFloat16x3(ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFX_BROADCAST_FLOAT16X3(dit) - r) * FFX_BROADCAST_FLOAT16X3(1.0 / 255.0))); + c = ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFX_BROADCAST_FLOAT16X3(dit) - r) * FFX_BROADCAST_FLOAT16X3(1.0 / 255.0)); } //------------------------------------------------------------------------------------------------------------------------------ void FsrTepdC10H(inout FfxFloat16x3 c, FfxFloat16 dit) @@ -1188,7 +1188,7 @@ void FsrEasuH( FfxFloat16x3 b = n + FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0); b = b * b; FfxFloat16x3 r = (c - b) * ffxApproximateReciprocalMediumHalf(a - b); - c = FfxFloat16x3(ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFX_BROADCAST_FLOAT16X3(dit) - r) * FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0))); + c = ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFX_BROADCAST_FLOAT16X3(dit) - r) * FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0)); } //============================================================================================================================== // This computes dither for positions 'p' and 'p+{8,0}'. @@ -1224,9 +1224,9 @@ void FsrEasuH( FfxFloat16x2 rR = (cR - bR) * ffxApproximateReciprocalMediumHalf(aR - bR); FfxFloat16x2 rG = (cG - bG) * ffxApproximateReciprocalMediumHalf(aG - bG); FfxFloat16x2 rB = (cB - bB) * ffxApproximateReciprocalMediumHalf(aB - bB); - cR = FfxFloat16x2(ffxSaturate(nR + ffxIsGreaterThanZeroHalf(dit - rR) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0))); - cG = FfxFloat16x2(ffxSaturate(nG + ffxIsGreaterThanZeroHalf(dit - rG) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0))); - cB = FfxFloat16x2(ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0))); + cR = ffxSaturate(nR + ffxIsGreaterThanZeroHalf(dit - rR) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0)); + cG = ffxSaturate(nG + ffxIsGreaterThanZeroHalf(dit - rG) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0)); + cB = ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0)); } //------------------------------------------------------------------------------------------------------------------------------ void FsrTepdC10Hx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB,FfxFloat16x2 dit){ @@ -1245,8 +1245,8 @@ void FsrEasuH( FfxFloat16x2 rR=(cR-bR)*ffxApproximateReciprocalMediumHalf(aR-bR); FfxFloat16x2 rG=(cG-bG)*ffxApproximateReciprocalMediumHalf(aG-bG); FfxFloat16x2 rB=(cB-bB)*ffxApproximateReciprocalMediumHalf(aB-bB); - cR=FfxFloat16x2(ffxSaturate(nR+ffxIsGreaterThanZeroHalf(dit-rR)*FFX_BROADCAST_FLOAT16X2(1.0/1023.0))); - cG=FfxFloat16x2(ffxSaturate(nG+ffxIsGreaterThanZeroHalf(dit-rG)*FFX_BROADCAST_FLOAT16X2(1.0/1023.0))); - cB=FfxFloat16x2(ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFX_BROADCAST_FLOAT16X2(1.0 / 1023.0))); + cR=ffxSaturate(nR+ffxIsGreaterThanZeroHalf(dit-rR)*FFX_BROADCAST_FLOAT16X2(1.0/1023.0)); + cG=ffxSaturate(nG+ffxIsGreaterThanZeroHalf(dit-rG)*FFX_BROADCAST_FLOAT16X2(1.0/1023.0)); + cB = ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFX_BROADCAST_FLOAT16X2(1.0 / 1023.0)); } #endif diff --git a/Assets/Shaders/FSR3/shaders/fsr3upscaler/spd/ffx_spd.h b/Assets/Shaders/FSR3/shaders/fsr3upscaler/spd/ffx_spd.h index 6441419..c3ee50f 100644 --- a/Assets/Shaders/FSR3/shaders/fsr3upscaler/spd/ffx_spd.h +++ b/Assets/Shaders/FSR3/shaders/fsr3upscaler/spd/ffx_spd.h @@ -1,16 +1,17 @@ // This file is part of the FidelityFX SDK. -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. -// +// // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -19,7 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - /// @defgroup FfxGPUSpd FidelityFX SPD /// FidelityFX Single Pass Downsampler 2.0 GPU documentation /// @@ -119,10 +119,13 @@ FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFl #endif // #if FFX_SPD_PACKED_ONLY //_____________________________________________________________/\_______________________________________________________________ +#if defined(FFX_GLSL) && !defined(FFX_SPD_NO_WAVE_OPERATIONS) +#extension GL_KHR_shader_subgroup_quad:require +#endif void ffxSpdWorkgroupShuffleBarrier() { - FFX_GROUP_MEMORY_BARRIER(); + FFX_GROUP_MEMORY_BARRIER; } // Only last active workgroup should proceed @@ -152,11 +155,10 @@ FfxFloat32x4 SpdReduceQuad(FfxFloat32x4 v) #elif defined(FFX_HLSL) && !defined(FFX_SPD_NO_WAVE_OPERATIONS) // requires SM6.0 - FfxUInt32 quad = WaveGetLaneIndex() & (~0x3); - FfxFloat32x4 v0 = v; - FfxFloat32x4 v1 = WaveReadLaneAt(v, quad | 1); - FfxFloat32x4 v2 = WaveReadLaneAt(v, quad | 2); - FfxFloat32x4 v3 = WaveReadLaneAt(v, quad | 3); + FfxFloat32x4 v0 = v; + FfxFloat32x4 v1 = QuadReadAcrossX(v); + FfxFloat32x4 v2 = QuadReadAcrossY(v); + FfxFloat32x4 v3 = QuadReadAcrossDiagonal(v); return SpdReduce4(v0, v1, v2, v3); /* // if SM6.0 is not available, you can use the AMD shader intrinsics @@ -576,6 +578,10 @@ void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxU #if FFX_HALF +#if defined(FFX_GLSL) +#extension GL_EXT_shader_subgroup_extended_types_float16:require +#endif + FfxFloat16x4 SpdReduceQuadH(FfxFloat16x4 v) { #if defined(FFX_GLSL) && !defined(FFX_SPD_NO_WAVE_OPERATIONS) @@ -586,11 +592,10 @@ FfxFloat16x4 SpdReduceQuadH(FfxFloat16x4 v) return SpdReduce4H(v0, v1, v2, v3); #elif defined(FFX_HLSL) && !defined(FFX_SPD_NO_WAVE_OPERATIONS) // requires SM6.0 - FfxUInt32 quad = WaveGetLaneIndex() & (~0x3); - FfxFloat16x4 v0 = v; - FfxFloat16x4 v1 = WaveReadLaneAt(v, quad | 1); - FfxFloat16x4 v2 = WaveReadLaneAt(v, quad | 2); - FfxFloat16x4 v3 = WaveReadLaneAt(v, quad | 3); + FfxFloat16x4 v0 = v; + FfxFloat16x4 v1 = QuadReadAcrossX(v); + FfxFloat16x4 v2 = QuadReadAcrossY(v); + FfxFloat16x4 v3 = QuadReadAcrossDiagonal(v); return SpdReduce4H(v0, v1, v2, v3); /* // if SM6.0 is not available, you can use the AMD shader intrinsics @@ -735,7 +740,7 @@ void SpdDownsampleMips_0_1_LDSH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupI if (mips <= 1) return; - for (FfxUInt32 i = 0; i < 4; i++) + for (FfxInt32 i = 0; i < 4; i++) { SpdStoreIntermediateH(x, y, v[i]); ffxSpdWorkgroupShuffleBarrier();