From 7a259062d841c39a9c0670e803f9b8d26727fb06 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Fri, 20 Dec 2024 18:22:15 +0100 Subject: [PATCH 01/88] Working SGSR2 3-pass: - Added padding field to fix buffer data misalignment issue - Simplified the translation of GLSL globalInvocationID to HLSL --- .../PostProcessing/Runtime/Effects/Upscaling.cs | 3 ++- .../Runtime/Effects/Upscaling/SGSR2/SGSR2.cs | 1 + .../SGSR2/Shaders/shaders/sgsr2_activate.hlsl | 9 +++++---- .../SGSR2/Shaders/shaders/sgsr2_convert.hlsl | 8 ++++---- .../SGSR2/Shaders/shaders/sgsr2_upscale.hlsl | 11 +++++------ 5 files changed, 17 insertions(+), 15 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs index ed98f22..235e09f 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs @@ -16,7 +16,7 @@ namespace UnityEngine.Rendering.PostProcessing [InspectorName("FidelityFX Super Resolution 2.2 (FSR2)")] FSR2, [InspectorName("FidelityFX Super Resolution 3.1 (FSR3)")] FSR3, //[InspectorName("Arm Accuracy Super Resolution (ASR)")] ASR, - //[InspectorName("Snapdragon Game Super Resolution 2 (SGSR2)")] SGSR2, + [InspectorName("Snapdragon Game Super Resolution 2 (SGSR2)")] SGSR2, [InspectorName("PlayStation Spectral Super Resolution (PSSR)")] PSSR, } @@ -180,6 +180,7 @@ namespace UnityEngine.Rendering.PostProcessing { UpscalerType.FSR2 when FSR2Upscaler.IsSupported => new FSR2Upscaler(), UpscalerType.FSR3 when FSR3Upscaler.IsSupported => new FSR3Upscaler(), + UpscalerType.SGSR2 when SGSR2Upscaler.IsSupported => new SGSR2Upscaler(), _ => new FSR2Upscaler(), // Fallback for when the selected upscaler is not supported on the current hardware }; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs index 00f5f8c..d6c658d 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs @@ -13,6 +13,7 @@ public class SGSR2 public Vector2 renderSizeRcp; public Vector2 displaySizeRcp; public Vector2 jitterOffset; + public Vector2 padding1; public Matrix4x4 clipToPrevClip; public float preExposure; public float cameraFovAngleHor; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_activate.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_activate.hlsl index 5185310..20b8b05 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_activate.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_activate.hlsl @@ -36,6 +36,7 @@ cbuffer Params : register(b0) float2 ViewportSizeInverse; float2 displaySizeRcp; float2 jitterOffset; + float2 padding1; float4 clipToPrevClip[4]; float preExposure; float cameraFovAngleHor; @@ -49,7 +50,7 @@ SamplerState s_PointClamp : register(s0); SamplerState s_LinearClamp : register(s1); [numthreads(8, 8, 1)] -void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID) +void CS(uint3 globalInvocationID : SV_DispatchThreadID) { int2 sampleOffset[4] = { int2(-1, -1), @@ -58,14 +59,14 @@ void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID) int2(+0, +0) }; - uint2 InputPos = GroupId * uint2(8, 8) + GroupThreadId; + uint2 InputPos = globalInvocationID.xy; - float2 ViewportUV = (float2(InputPos) + 0.5f) * ViewportSizeInverse; + float2 ViewportUV = (float2(globalInvocationID.xy) + 0.5f) * ViewportSizeInverse; float2 gatherCoord = ViewportUV + 0.5f * ViewportSizeInverse; uint luma_reference32 = YCoCgColor.GatherRed(s_PointClamp, gatherCoord).w; float luma_reference = DecodeColorY(luma_reference32); - float4 mda = MotionDepthAlphaBuffer[InputPos].xyzw; //motion depth alpha + float4 mda = MotionDepthAlphaBuffer[globalInvocationID.xy].xyzw; //motion depth alpha float depth = mda.z; float alphamask = mda.w; float2 motion = mda.xy; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_convert.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_convert.hlsl index 17d12fa..706a3ec 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_convert.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_convert.hlsl @@ -28,6 +28,7 @@ cbuffer Params : register(b0) float2 ViewportSizeInverse; float2 displaySizeRcp; float2 jitterOffset; + float2 padding1; float4 clipToPrevClip[4]; float preExposure; float cameraFovAngleHor; @@ -41,12 +42,11 @@ SamplerState s_PointClamp : register(s0); SamplerState s_LinearClamp : register(s1); [numthreads(8, 8, 1)] -void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID) +void CS(uint3 globalInvocationID : SV_DispatchThreadID) { - uint2 InputPos = GroupId * uint2(8, 8) + GroupThreadId; - - float2 gatherCoord = float2(InputPos) * ViewportSizeInverse; + float2 gatherCoord = float2(globalInvocationID.xy) * ViewportSizeInverse; float2 ViewportUV = gatherCoord + 0.5f * ViewportSizeInverse; + uint2 InputPos = globalInvocationID.xy; //derived from ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h //FindNearestDepth diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_upscale.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_upscale.hlsl index acadb9d..b9bf7f2 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_upscale.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_upscale.hlsl @@ -40,6 +40,7 @@ cbuffer Params : register(b0) float2 renderSizeRcp; float2 displaySizeRcp; float2 jitterOffset; + float2 padding1; float4 clipToPrevClip[4]; float preExposure; float cameraFovAngleHor; @@ -53,10 +54,8 @@ SamplerState s_PointClamp : register(s0); SamplerState s_LinearClamp : register(s1); [numthreads(8, 8, 1)] -void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID) +void CS(uint3 globalInvocationID : SV_DispatchThreadID) { - uint2 InvocationID = GroupId * uint2(8, 8) + GroupThreadId; - float Biasmax_viewportXScale = min(float(displaySize.x) / float(renderSize.x), 1.99); //Biasmax_viewportXScale float scalefactor = min(20.0, pow((float(displaySize.x) / float(renderSize.x)) * (float(displaySize.y) / float(renderSize.y)), 3.0)); float f2 = preExposure; //1.0; //preExposure @@ -64,7 +63,7 @@ void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID) float2 HistoryInfoViewportSize = float2(displaySize); float2 InputJitter = jitterOffset; float2 InputInfoViewportSize = float2(renderSize); - float2 Hruv = (float2(InvocationID) + 0.5f) * HistoryInfoViewportSizeInverse; + float2 Hruv = (float2(globalInvocationID.xy) + 0.5f) * HistoryInfoViewportSizeInverse; float2 Jitteruv; Jitteruv.x = clamp(Hruv.x + (InputJitter.x * HistoryInfoViewportSizeInverse.x), 0.0, 1.0); Jitteruv.y = clamp(Hruv.y + (InputJitter.y * HistoryInfoViewportSizeInverse.y), 0.0, 1.0); @@ -297,7 +296,7 @@ void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID) float alpha = clamp(Upsampledcw.w / alphasum + float(reset), 0.0, 1.0); Upsampledcw.xyz = lerp(HistoryColor, Upsampledcw.xyz, alpha); - HistoryOutput[InvocationID.xy] = float4(Upsampledcw.xyz, Wfactor); + HistoryOutput[globalInvocationID.xy] = float4(Upsampledcw.xyz, Wfactor); ////ycocg to grb float x_z = Upsampledcw.x - Upsampledcw.z; @@ -311,5 +310,5 @@ void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID) float scale = preExposure / ((1.0f + 1.0f / 65504.0f) - compMax); //(1.0f + 1.0f / 65504.0f) = 1.000015e+00 Upsampledcw.xyz = Upsampledcw.xyz * scale; - SceneColorOutput[InvocationID.xy] = Upsampledcw; + SceneColorOutput[globalInvocationID.xy] = Upsampledcw; } From 708e4308c5ff55d6ed20145dcd439c942d3d345e Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Fri, 20 Dec 2024 22:01:37 +0100 Subject: [PATCH 02/88] Consolidated SGSR2 shaders into single .compute files as there's not much point in using includes, and grouped them together into a 3-pass folder. This will make it easier to add the other variants later. --- .../Shaders/{shaders.meta => 3_pass_cs.meta} | 2 +- .../sgsr2_activate.compute} | 16 +++++--- .../sgsr2_activate.compute.meta | 0 .../sgsr2_convert.compute} | 12 +++--- .../sgsr2_convert.compute.meta | 0 .../sgsr2_upscale.compute} | 38 ++++++++++--------- .../sgsr2_upscale.compute.meta | 0 .../SGSR2/Shaders/sgsr2_activate.compute | 3 -- .../SGSR2/Shaders/sgsr2_convert.compute | 3 -- .../SGSR2/Shaders/sgsr2_upscale.compute | 3 -- .../Shaders/shaders/sgsr2_activate.hlsl.meta | 7 ---- .../Shaders/shaders/sgsr2_convert.hlsl.meta | 7 ---- .../Shaders/shaders/sgsr2_upscale.hlsl.meta | 7 ---- 13 files changed, 38 insertions(+), 60 deletions(-) rename Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/{shaders.meta => 3_pass_cs.meta} (77%) rename Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/{shaders/sgsr2_activate.hlsl => 3_pass_cs/sgsr2_activate.compute} (93%) rename Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/{ => 3_pass_cs}/sgsr2_activate.compute.meta (100%) rename Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/{shaders/sgsr2_convert.hlsl => 3_pass_cs/sgsr2_convert.compute} (95%) rename Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/{ => 3_pass_cs}/sgsr2_convert.compute.meta (100%) rename Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/{shaders/sgsr2_upscale.hlsl => 3_pass_cs/sgsr2_upscale.compute} (93%) rename Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/{ => 3_pass_cs}/sgsr2_upscale.compute.meta (100%) delete mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_activate.compute delete mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_convert.compute delete mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_upscale.compute delete mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_activate.hlsl.meta delete mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_convert.hlsl.meta delete mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_upscale.hlsl.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs.meta similarity index 77% rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders.meta rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs.meta index 0d0996c..c7f1f9e 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders.meta +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs.meta @@ -1,5 +1,5 @@ fileFormatVersion: 2 -guid: 81bb130e0ef32fa4fb623a65d2f2116e +guid: 30c2581cbd096d349bf552c4b0886d84 folderAsset: yes DefaultImporter: externalObjects: {} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_activate.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute similarity index 93% rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_activate.hlsl rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute index 20b8b05..53223c3 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_activate.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute @@ -1,3 +1,7 @@ +#pragma kernel CS + +// TODO: what about REQUEST_NDC_Y_UP? Might be graphics API-dependent, look at Unity's shader includes. + //============================================================================================================ // // @@ -9,8 +13,8 @@ #define EPSILON 1.19e-07f float DecodeColorY(uint sample32) { - uint x11 = sample32 >> 21u; - return float(x11) * (1.0 / 2047.5); + uint x11 = sample32 >> 21u; + return float(x11) * (1.0 / 2047.5); } uint packHalf2x16(float2 value) @@ -66,10 +70,10 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) uint luma_reference32 = YCoCgColor.GatherRed(s_PointClamp, gatherCoord).w; float luma_reference = DecodeColorY(luma_reference32); - float4 mda = MotionDepthAlphaBuffer[globalInvocationID.xy].xyzw; //motion depth alpha - float depth = mda.z; - float alphamask = mda.w; - float2 motion = mda.xy; + float4 mda = MotionDepthAlphaBuffer[globalInvocationID.xy].xyzw; //motion depth alpha + float depth = mda.z; + float alphamask = mda.w; + float2 motion = mda.xy; #ifdef REQUEST_NDC_Y_UP float2 PrevUV = float2(-0.5f * motion.x + ViewportUV.x, 0.5f * motion.y + ViewportUV.y); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_activate.compute.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute.meta similarity index 100% rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_activate.compute.meta rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_convert.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute similarity index 95% rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_convert.hlsl rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute index 706a3ec..2b8ea0e 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_convert.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute @@ -1,3 +1,5 @@ +#pragma kernel CS + //============================================================================================================ // // @@ -57,15 +59,15 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) float4 topleft = InputDepth.GatherRed(s_PointClamp, gatherCoord); NearestZ = max(topleft.x, NearestZ); - NearestZ = max(topleft.y, NearestZ); - NearestZ = max(topleft.z, NearestZ); - NearestZ = max(topleft.w, NearestZ); + NearestZ = max(topleft.y, NearestZ); + NearestZ = max(topleft.z, NearestZ); + NearestZ = max(topleft.w, NearestZ); float2 v11 = float2(ViewportSizeInverse.x, 0.0); float2 topRight = InputDepth.GatherRed(s_PointClamp, (gatherCoord + v11)).yz; - NearestZ = max(topRight.x, NearestZ); - NearestZ = max(topRight.y, NearestZ); + NearestZ = max(topRight.x, NearestZ); + NearestZ = max(topRight.y, NearestZ); float2 v13 = float2(0.0, ViewportSizeInverse.y); float2 bottomLeft = InputDepth.GatherRed(s_PointClamp, (gatherCoord + v13)).xy; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_convert.compute.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute.meta similarity index 100% rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_convert.compute.meta rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_upscale.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute similarity index 93% rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_upscale.hlsl rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute index b9bf7f2..0c39ebe 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_upscale.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute @@ -1,3 +1,5 @@ +#pragma kernel CS + //============================================================================================================ // // @@ -8,23 +10,23 @@ float FastLanczos(float base) { - float y = base - 1.0f; - float y2 = y * y; - float y_temp = 0.75f * y + y2; - return y_temp * y2; + float y = base - 1.0f; + float y2 = y * y; + float y_temp = 0.75f * y + y2; + return y_temp * y2; } float3 DecodeColor(uint sample32) { - uint x11 = sample32 >> 21u; - uint y11 = sample32 & (2047u << 10u); - uint z10 = sample32 & 1023u; - float3 samplecolor; - samplecolor.x = (float(x11) * (1.0 / 2047.5)); - samplecolor.y = (float(y11) * (4.76953602e-7)) - 0.5; - samplecolor.z = (float(z10) * (1.0 / 1023.5)) - 0.5; + uint x11 = sample32 >> 21u; + uint y11 = sample32 & (2047u << 10u); + uint z10 = sample32 & 1023u; + float3 samplecolor; + samplecolor.x = (float(x11) * (1.0 / 2047.5)); + samplecolor.y = (float(y11) * (4.76953602e-7)) - 0.5; + samplecolor.z = (float(z10) * (1.0 / 1023.5)) - 0.5; - return samplecolor; + return samplecolor; } Texture2D PrevHistoryOutput : register(t0); @@ -266,9 +268,9 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) Upsampledcw.xyz = clamp(Upsampledcw.xyz / Upsampledcw.w, rectboxmin-0.05f, rectboxmax+0.05f); Upsampledcw.w = Upsampledcw.w * (1.0f / 3.0f) ; - float tcontribute = history_value * clamp(rectboxvar.x * 10.0f, 0.0, 1.0); - float OneMinusWfactor = 1.0f - Wfactor; - tcontribute = tcontribute * OneMinusWfactor; + float tcontribute = history_value * clamp(rectboxvar.x * 10.0f, 0.0, 1.0); + float OneMinusWfactor = 1.0f - Wfactor; + tcontribute = tcontribute * OneMinusWfactor; float baseupdate = OneMinusWfactor - OneMinusWfactor * depthfactor; baseupdate = min(baseupdate, lerp(baseupdate, Upsampledcw.w *10.0f, clamp(10.0f* motion_viewport_len, 0.0, 1.0))); @@ -285,9 +287,9 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) rectboxmin = max(rectboxmin, boxmin); float3 clampedcolor = clamp(HistoryColor, rectboxmin, rectboxmax); - float lerpcontribution = (any(rectboxmin > HistoryColor) || any(HistoryColor > rectboxmax)) ? tcontribute : 1.0f; - lerpcontribution = lerpcontribution - lerpcontribution * sqrt(alphamask); - HistoryColor = lerp(clampedcolor, HistoryColor, clamp(lerpcontribution, 0.0, 1.0)); + float lerpcontribution = (any(rectboxmin > HistoryColor) || any(HistoryColor > rectboxmax)) ? tcontribute : 1.0f; + lerpcontribution = lerpcontribution - lerpcontribution * sqrt(alphamask); + HistoryColor = lerp(clampedcolor, HistoryColor, clamp(lerpcontribution, 0.0, 1.0)); float basemin = min(basealpha, 0.1f); basealpha = lerp(basemin, basealpha, clamp(lerpcontribution, 0.0, 1.0)); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_upscale.compute.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute.meta similarity index 100% rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_upscale.compute.meta rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_activate.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_activate.compute deleted file mode 100644 index 76a9d8a..0000000 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_activate.compute +++ /dev/null @@ -1,3 +0,0 @@ -#pragma kernel CS - -#include "shaders/sgsr2_activate.hlsl" diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_convert.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_convert.compute deleted file mode 100644 index beb2024..0000000 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_convert.compute +++ /dev/null @@ -1,3 +0,0 @@ -#pragma kernel CS - -#include "shaders/sgsr2_convert.hlsl" diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_upscale.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_upscale.compute deleted file mode 100644 index 70b4cae..0000000 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_upscale.compute +++ /dev/null @@ -1,3 +0,0 @@ -#pragma kernel CS - -#include "shaders/sgsr2_upscale.hlsl" diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_activate.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_activate.hlsl.meta deleted file mode 100644 index 790c78a..0000000 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_activate.hlsl.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: 503a58dfbaf241c4cbabb2b264d66f96 -ShaderIncludeImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_convert.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_convert.hlsl.meta deleted file mode 100644 index 854e60d..0000000 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_convert.hlsl.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: d980c2a95e08a894d96b558154687e24 -ShaderIncludeImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_upscale.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_upscale.hlsl.meta deleted file mode 100644 index b81c841..0000000 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_upscale.hlsl.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: 5cde6f90c795fb841a38f37495375e6e -ShaderIncludeImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: From 14fa1bc7d26af8f45feefec62071b0f304444efb Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Fri, 20 Dec 2024 23:16:06 +0100 Subject: [PATCH 03/88] Initial port of SGSR2 2-pass compute shaders to HLSL (untested) --- .../PostProcessing/PostProcessResources.asset | 10 +- .../Runtime/Effects/Upscaling/SGSR2/SGSR2.cs | 24 +- .../Upscaling/SGSR2/Shaders/2_pass_cs.meta | 8 + .../Shaders/2_pass_cs/sgsr2_convert.compute | 136 +++++++ .../2_pass_cs/sgsr2_convert.compute.meta | 7 + .../Shaders/2_pass_cs/sgsr2_upscale.compute | 332 ++++++++++++++++++ .../2_pass_cs/sgsr2_upscale.compute.meta | 7 + .../Effects/Upscaling/SGSR2Upscaler.cs | 8 +- 8 files changed, 519 insertions(+), 13 deletions(-) create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset index e79716f..5c81a3d 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset @@ -150,6 +150,10 @@ MonoBehaviour: tcrAutoGenPass: {fileID: 7200000, guid: 75cdc6ef23f08ed498d4da511923fcea, type: 3} debugViewPass: {fileID: 7200000, guid: cb24a71d54164c54eb5e86839acd48c5, type: 3} sgsr2Upscaler: - convert: {fileID: 7200000, guid: a41757aacd8b70e42a4001d514bfbe53, type: 3} - activate: {fileID: 7200000, guid: d7de362950af6fe4e90da7d6e32f9826, type: 3} - upscale: {fileID: 7200000, guid: 5d28d29787492b74aa736a21f70572c7, type: 3} + twoPassCompute: + convert: {fileID: 7200000, guid: 073ee927fbee25841a31cf364834071c, type: 3} + upscale: {fileID: 7200000, guid: d7bacd7d04c6521499bef936d93921cc, type: 3} + threePassCompute: + convert: {fileID: 7200000, guid: a41757aacd8b70e42a4001d514bfbe53, type: 3} + activate: {fileID: 7200000, guid: d7de362950af6fe4e90da7d6e32f9826, type: 3} + upscale: {fileID: 7200000, guid: 5d28d29787492b74aa736a21f70572c7, type: 3} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs index d6c658d..d719336 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs @@ -3,7 +3,7 @@ using System.Collections; using System.Collections.Generic; using UnityEngine; -public class SGSR2 +public static class SGSR2 { [Serializable] public struct Params @@ -26,10 +26,22 @@ public class SGSR2 [Serializable] public class Shaders { - public ComputeShader convert; - - public ComputeShader activate; - - public ComputeShader upscale; + public TwoPassCompute twoPassCompute; + public ThreePassCompute threePassCompute; + + [Serializable] + public class TwoPassCompute + { + public ComputeShader convert; + public ComputeShader upscale; + } + + [Serializable] + public class ThreePassCompute + { + public ComputeShader convert; + public ComputeShader activate; + public ComputeShader upscale; + } } } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs.meta new file mode 100644 index 0000000..12e48c2 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: def18d58a2ff64f44a2d9f73e487a689 +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute new file mode 100644 index 0000000..18c822a --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute @@ -0,0 +1,136 @@ +#pragma kernel CS + +//============================================================================================================ +// +// +// Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. +// SPDX-License-Identifier: BSD-3-Clause +// +//============================================================================================================ + +float2 decodeVelocityFromTexture(float2 ev) { + const float inv_div = 1.0f / (0.499f * 0.5f); + float2 dv; + dv.xy = ev.xy * inv_div - 32767.0f / 65535.0f * inv_div; + //dv.z = uintBitsToFloat((uint(round(ev.z * 65535.0f)) << 16) | uint(round(ev.w * 65535.0f))); + return dv; +} + +Texture2D InputColor : register(t0); +Texture2D InputDepth : register(t1); +Texture2D InputVelocity : register(t2); +RWTexture2D MotionDepthClipAlphaBuffer : register(u0); +RWTexture2D YCoCgColor : register(u1); + +cbuffer Params : register(b0) +{ + uint2 renderSize; + uint2 displaySize; + float2 renderSizeRcp; + float2 displaySizeRcp; + float2 jitterOffset; + float2 padding1; + float4 clipToPrevClip[4]; + float preExposure; + float cameraFovAngleHor; + float cameraNear; + float MinLerpContribution; + uint bSameCamera; + uint reset; +}; + +SamplerState s_PointClamp : register(s0); +SamplerState s_LinearClamp : register(s1); + +[numthreads(8, 8, 1)] +void CS(uint3 globalInvocationID : SV_DispatchThreadID) +{ + float Exposure_co_rcp = preExposure; + float2 ViewportSizeInverse = displaySizeRcp.xy; + uint2 InputPos = globalInvocationID.xy; + + float2 gatherCoord = float2(globalInvocationID.xy) * ViewportSizeInverse; + float2 ViewportUV = gatherCoord + 0.5f * ViewportSizeInverse; + + //derived from ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h + //FindNearestDepth + + float4 topleft = InputDepth.GatherRed(s_PointClamp, gatherCoord); + float2 v10 = float2(ViewportSizeInverse.x*2.0, 0.0); + float4 topRight = InputDepth.GatherRed(s_PointClamp, (gatherCoord+v10)); + float2 v12 = float2(0.0, ViewportSizeInverse.y*2.0); + float4 bottomLeft = InputDepth.GatherRed(s_PointClamp, (gatherCoord+v12)); + float2 v14 = float2(ViewportSizeInverse.x*2.0, ViewportSizeInverse.y*2.0); + float4 bottomRight = InputDepth.GatherRed(s_PointClamp, (gatherCoord+v14)); + float maxC = max(max(max(topleft.y,topRight.x),bottomLeft.z),bottomRight.w); + float topleft4 = max(max(max(topleft.y,topleft.x),topleft.z),topleft.w); + float topLeftMax9 = max(bottomLeft.w,max(max(maxC,topleft4),topRight.w)); + + float depthclip = 0.0; + if (maxC > 1.0e-05f) + { + float topRight4 = max(max(max(topRight.y,topRight.x),topRight.z),topRight.w); + float bottomLeft4 = max(max(max(bottomLeft.y,bottomLeft.x),bottomLeft.z),bottomLeft.w); + float bottomRight4 = max(max(max(bottomRight.y,bottomRight.x),bottomRight.z),bottomRight.w); + + float Wdepth = 0.0; + float Ksep = 1.37e-05f; + float Kfov = cameraFovAngleHor; + float diagonal_length = length(float2(renderSize)); + float Ksep_Kfov_diagonal = Ksep * Kfov * diagonal_length; + + float Depthsep = Ksep_Kfov_diagonal * (1.0 - maxC); + float EPSILON = 1.19e-07f; + Wdepth += clamp((Depthsep / (abs(maxC - topleft4) + EPSILON)), 0.0, 1.0); + Wdepth += clamp((Depthsep / (abs(maxC - topRight4) + EPSILON)), 0.0, 1.0); + Wdepth += clamp((Depthsep / (abs(maxC - bottomLeft4) + EPSILON)), 0.0, 1.0); + Wdepth += clamp((Depthsep / (abs(maxC - bottomRight4) + EPSILON)), 0.0, 1.0); + depthclip = clamp(1.0f - Wdepth*0.25, 0.0, 1.0); + } + + //refer to ue/fsr2 PostProcessFFX_FSR2ConvertVelocity.usf, and using nearest depth for dilated motion + + float2 EncodedVelocity = InputVelocity[InputPos]; + + float2 motion; + if (EncodedVelocity.x > 0.0) + { + motion = decodeVelocityFromTexture(EncodedVelocity.xy); + } + else + { +#ifdef REQUEST_NDC_Y_UP + float2 ScreenPos = float2(2.0f * ViewportUV.x - 1.0f, 1.0f - 2.0f * ViewportUV.y); +#else + float2 ScreenPos = float2(2.0f * ViewportUV - 1.0f); +#endif + float3 Position = float3(ScreenPos, topLeftMax9); //this_clip + float4 PreClip = clipToPrevClip[3] + ((clipToPrevClip[2] * Position.z) + ((clipToPrevClip[1] * ScreenPos.y) + (clipToPrevClip[0] * ScreenPos.x))); + float2 PreScreen = PreClip.xy / PreClip.w; + motion = Position.xy - PreScreen; + } + + motion = EncodedVelocity; + + ////////////compute luma + float3 Colorrgb = InputColor[InputPos].xyz; + + ///simple tonemap + float ColorMax = max(max(Colorrgb.x, Colorrgb.y), Colorrgb.z) + Exposure_co_rcp; + Colorrgb /= ColorMax; + + float3 Colorycocg; + Colorycocg.x = 0.25 * (Colorrgb.x + 2.0 * Colorrgb.y + Colorrgb.z); + Colorycocg.y = clamp(0.5 * Colorrgb.x + 0.5 - 0.5 * Colorrgb.z, 0.0, 1.0); + Colorycocg.z = clamp(Colorycocg.x + Colorycocg.y - Colorrgb.x, 0.0, 1.0); + + //now color YCoCG all in the range of [0,1] + uint x11 = uint(Colorycocg.x * 2047.5); + uint y11 = uint(Colorycocg.y * 2047.5); + uint z10 = uint(Colorycocg.z * 1023.5); + + YCoCgColor[InputPos] = ((x11 << 21u) | (y11 << 10u)) | z10; + + float4 v29 = float4(motion, depthclip, ColorMax); + MotionDepthClipAlphaBuffer[InputPos] = v29; +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute.meta new file mode 100644 index 0000000..6b0cc00 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 073ee927fbee25841a31cf364834071c +ComputeShaderImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute new file mode 100644 index 0000000..6c541d2 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute @@ -0,0 +1,332 @@ +#pragma kernel CS + +//============================================================================================================ +// +// +// Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. +// SPDX-License-Identifier: BSD-3-Clause +// +//============================================================================================================ + +float FastLanczos(float base) +{ + float y = base - 1.0f; + float y2 = y * y; + float y_temp = 0.75f * y + y2; + return y_temp * y2; +} + +float3 DecodeColor(uint sample32) +{ + uint x11 = sample32 >> 21u; + uint y11 = sample32 & (2047u << 10u); + uint z10 = sample32 & 1023u; + float3 samplecolor; + samplecolor.x = (float(x11) * (1.0 / 2047.5)); + samplecolor.y = (float(y11) * (4.76953602e-7)) - 0.5; + samplecolor.z = (float(z10) * (1.0 / 1023.5)) - 0.5; + + return samplecolor; +} + +Texture2D PrevHistoryOutput : register(t0); +Texture2D MotionDepthClipAlphaBuffer : register(t1); +Texture2D YCoCgColor : register(t2); +RWTexture2D SceneColorOutput : register(u0); +RWTexture2D HistoryOutput : register(u1); + +cbuffer Params : register(b0) +{ + uint2 renderSize; + uint2 displaySize; + float2 renderSizeRcp; + float2 displaySizeRcp; + float2 jitterOffset; + float2 padding1; + float4 clipToPrevClip[4]; + float preExposure; + float cameraFovAngleHor; + float cameraNear; + float MinLerpContribution; + uint bSameCamera; + uint reset; +}; + +SamplerState s_PointClamp : register(s0); +SamplerState s_LinearClamp : register(s1); + +[numthreads(8, 8, 1)] +void CS(uint3 globalInvocationID : SV_DispatchThreadID) +{ + float Biasmax_viewportXScale = min(float(displaySize.x) / float(renderSize.x), 1.99); //Biasmax_viewportXScale + float scalefactor = min(20.0, pow((float(displaySize.x) / float(renderSize.x)) * (float(displaySize.y) / float(renderSize.y)), 3.0)); + float f2 = preExposure; //1.0; //preExposure + float2 HistoryInfoViewportSizeInverse = displaySizeRcp; + float2 HistoryInfoViewportSize = float2(displaySize); + float2 InputJitter = jitterOffset; + float2 InputInfoViewportSize = float2(renderSize); + float2 Hruv = (float2(globalInvocationID.xy) + 0.5f) * HistoryInfoViewportSizeInverse; + float2 Jitteruv; + Jitteruv.x = clamp(Hruv.x + (InputJitter.x * HistoryInfoViewportSizeInverse.x), 0.0, 1.0); + Jitteruv.y = clamp(Hruv.y + (InputJitter.y * HistoryInfoViewportSizeInverse.y), 0.0, 1.0); + + int2 InputPos = int2(Jitteruv * InputInfoViewportSize); + float4 mda = MotionDepthClipAlphaBuffer.SampleLevel(s_LinearClamp, Jitteruv, 0).xyzw; + float2 Motion = mda.xy; + + ///ScreenPosToViewportScale&Bias + float2 PrevUV; + PrevUV.x = clamp(-0.5 * Motion.x + Hruv.x, 0.0, 1.0); +#ifdef REQUEST_NDC_Y_UP + PrevUV.y = clamp(0.5 * Motion.y + Hruv.y, 0.0, 1.0); +#else + PrevUV.y = clamp(-0.5 * Motion.y + Hruv.y, 0.0, 1.0); +#endif + + float depthfactor = mda.z; + float ColorMax = mda.w; + + float4 History = PrevHistoryOutput.SampleLevel(s_LinearClamp, PrevUV, 0); + float3 HistoryColor = History.xyz; + float Historyw = History.w; + float Wfactor = clamp(abs(Historyw), 0.0, 1.0); + + /////upsample and compute box + float4 Upsampledcw = 0.0f; + float kernelfactor = clamp(Wfactor + float(reset), 0.0, 1.0); + float biasmax = Biasmax_viewportXScale - Biasmax_viewportXScale * kernelfactor; + float biasmin = max(1.0f, 0.3 + 0.3 * biasmax); + float biasfactor = max(0.25f * depthfactor, kernelfactor); + float kernelbias = lerp(biasmax, biasmin, biasfactor); + float motion_viewport_len = length(Motion * HistoryInfoViewportSize); + float curvebias = lerp(-2.0, -3.0, clamp(motion_viewport_len * 0.02, 0.0, 1.0)); + + float3 rectboxcenter = 0.0f; + float3 rectboxvar = 0.0f; + float rectboxweight = 0.0f; + float2 srcpos = float2(InputPos) + 0.5f - InputJitter; + float2 srcOutputPos = Hruv * InputInfoViewportSize; + + kernelbias *= 0.5f; + float kernelbias2 = kernelbias * kernelbias; + float2 srcpos_srcOutputPos = srcpos - srcOutputPos; + + int2 InputPosBtmRight = 1 + InputPos; + float2 gatherCoord = float2(InputPos) * renderSizeRcp; + uint btmRight = YCoCgColor[InputPosBtmRight].x; + uint4 topleft = YCoCgColor.GatherRed(s_PointClamp, gatherCoord); + uint2 topRight = 0; + uint2 bottomLeft = 0; + + uint sameCameraFrmNum = bSameCamera; + + if (sameCameraFrmNum!=0u) + { + topRight = YCoCgColor.GatherRed(s_PointClamp, gatherCoord + float2(renderSizeRcp.x, 0.0)).yz; + bottomLeft = YCoCgColor.GatherRed(s_PointClamp, gatherCoord + float2(0.0, renderSizeRcp.y)).xy; + } + else + { + uint2 btmRight = YCoCgColor.GatherRed(s_PointClamp, gatherCoord + float2(renderSizeRcp.x, renderSizeRcp.y)).xz; + bottomLeft.y = btmRight.x; + topRight.x = btmRight.y; + } + + float3 rectboxmin; + float3 rectboxmax; + { + float3 samplecolor = DecodeColor(bottomLeft.y); + float2 baseoffset = srcpos_srcOutputPos + float2(0.0, 1.0); + float baseoffset_dot = dot(baseoffset, baseoffset); + float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + float weight = FastLanczos(base); + Upsampledcw += float4(samplecolor * weight, weight); + float boxweight = exp(baseoffset_dot * curvebias); + rectboxmin = samplecolor; + rectboxmax = samplecolor; + float3 wsample = samplecolor * boxweight; + rectboxcenter += wsample; + rectboxvar += (samplecolor * wsample); + rectboxweight += boxweight; + } + { + float3 samplecolor = DecodeColor(topRight.x); + float2 baseoffset = srcpos_srcOutputPos + float2(1.0, 0.0); + float baseoffset_dot = dot(baseoffset, baseoffset); + float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + float weight = FastLanczos(base); + Upsampledcw += float4(samplecolor * weight, weight); + float boxweight = exp(baseoffset_dot * curvebias); + rectboxmin = min(rectboxmin, samplecolor); + rectboxmax = max(rectboxmax, samplecolor); + float3 wsample = samplecolor * boxweight; + rectboxcenter += wsample; + rectboxvar += (samplecolor * wsample); + rectboxweight += boxweight; + } + { + float3 samplecolor = DecodeColor(topleft.x); + float2 baseoffset = srcpos_srcOutputPos + float2(-1.0, 0.0); + float baseoffset_dot = dot(baseoffset, baseoffset); + float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + float weight = FastLanczos(base); + Upsampledcw += float4(samplecolor * weight, weight); + float boxweight = exp(baseoffset_dot * curvebias); + rectboxmin = min(rectboxmin, samplecolor); + rectboxmax = max(rectboxmax, samplecolor); + float3 wsample = samplecolor * boxweight; + rectboxcenter += wsample; + rectboxvar += (samplecolor * wsample); + rectboxweight += boxweight; + } + { + float3 samplecolor = DecodeColor(topleft.y); + float2 baseoffset = srcpos_srcOutputPos; + float baseoffset_dot = dot(baseoffset, baseoffset); + float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + float weight = FastLanczos(base); + Upsampledcw += float4(samplecolor * weight, weight); + float boxweight = exp(baseoffset_dot * curvebias); + rectboxmin = min(rectboxmin, samplecolor); + rectboxmax = max(rectboxmax, samplecolor); + float3 wsample = samplecolor * boxweight; + rectboxcenter += wsample; + rectboxvar += (samplecolor * wsample); + rectboxweight += boxweight; + } + { + float3 samplecolor = DecodeColor(topleft.z); + float2 baseoffset = srcpos_srcOutputPos + float2(0.0, -1.0); + float baseoffset_dot = dot(baseoffset, baseoffset); + float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + float weight = FastLanczos(base); + Upsampledcw += float4(samplecolor * weight, weight); + float boxweight = exp(baseoffset_dot * curvebias); + rectboxmin = min(rectboxmin, samplecolor); + rectboxmax = max(rectboxmax, samplecolor); + float3 wsample = samplecolor * boxweight; + rectboxcenter += wsample; + rectboxvar += (samplecolor * wsample); + rectboxweight += boxweight; + } + + if (sameCameraFrmNum!=0u) + { + { + float3 samplecolor = DecodeColor(btmRight); + float2 baseoffset = srcpos_srcOutputPos + float2(1.0, 1.0); + float baseoffset_dot = dot(baseoffset, baseoffset); + float base = clamp(baseoffset_dot * kernelbias2, 0.0, 1.0); + float weight = FastLanczos(base); + Upsampledcw += float4(samplecolor * weight, weight); + float boxweight = exp(baseoffset_dot * curvebias); + rectboxmin = min(rectboxmin, samplecolor); + rectboxmax = max(rectboxmax, samplecolor); + float3 wsample = samplecolor * boxweight; + rectboxcenter += wsample; + rectboxvar += (samplecolor * wsample); + rectboxweight += boxweight; + } + { + float3 samplecolor = DecodeColor(bottomLeft.x); + float2 baseoffset = srcpos_srcOutputPos + float2(-1.0, 1.0); + float baseoffset_dot = dot(baseoffset, baseoffset); + float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + float weight = FastLanczos(base); + Upsampledcw += float4(samplecolor * weight, weight); + float boxweight = exp(baseoffset_dot * curvebias); + rectboxmin = min(rectboxmin, samplecolor); + rectboxmax = max(rectboxmax, samplecolor); + float3 wsample = samplecolor * boxweight; + rectboxcenter += wsample; + rectboxvar += (samplecolor * wsample); + rectboxweight += boxweight; + } + { + float3 samplecolor = DecodeColor(topRight.y); + float2 baseoffset = srcpos_srcOutputPos + float2(1.0, -1.0); + float baseoffset_dot = dot(baseoffset, baseoffset); + float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + float weight = FastLanczos(base); + Upsampledcw += float4(samplecolor * weight, weight); + float boxweight = exp(baseoffset_dot * curvebias); + rectboxmin = min(rectboxmin, samplecolor); + rectboxmax = max(rectboxmax, samplecolor); + float3 wsample = samplecolor * boxweight; + rectboxcenter += wsample; + rectboxvar += (samplecolor * wsample); + rectboxweight += boxweight; + } + + { + float3 samplecolor = DecodeColor(topleft.w); + float2 baseoffset = srcpos_srcOutputPos + float2(-1.0, -1.0); + float baseoffset_dot = dot(baseoffset, baseoffset); + float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + float weight = FastLanczos(base); + Upsampledcw += float4(samplecolor * weight, weight); + float boxweight = exp(baseoffset_dot * curvebias); + rectboxmin = min(rectboxmin, samplecolor); + rectboxmax = max(rectboxmax, samplecolor); + float3 wsample = samplecolor * boxweight; + rectboxcenter += wsample; + rectboxvar += (samplecolor * wsample); + rectboxweight += boxweight; + } + } + + rectboxweight = 1.0 / rectboxweight; + rectboxcenter *= rectboxweight; + rectboxvar *= rectboxweight; + rectboxvar = sqrt(abs(rectboxvar - rectboxcenter * rectboxcenter)); + + Upsampledcw.xyz = clamp(Upsampledcw.xyz / Upsampledcw.w, rectboxmin-0.05f, rectboxmax+0.05f); + Upsampledcw.w = Upsampledcw.w * (1.0f / 3.0f) ; + + float OneMinusWfactor = 1.0f - Wfactor; + + float baseupdate = OneMinusWfactor - OneMinusWfactor * depthfactor; + baseupdate = min(baseupdate, lerp(baseupdate, Upsampledcw.w *10.0f, clamp(10.0f* motion_viewport_len, 0.0, 1.0))); + baseupdate = min(baseupdate, lerp(baseupdate, Upsampledcw.w, clamp(motion_viewport_len *0.05f, 0.0, 1.0))); + float basealpha = baseupdate; + + const float EPSILON = 1.192e-07f; + float boxscale = max(depthfactor, clamp(motion_viewport_len * 0.05f, 0.0, 1.0)); + float boxsize = lerp(scalefactor, 1.0f, boxscale); + float3 sboxvar = rectboxvar * boxsize; + float3 boxmin = rectboxcenter - sboxvar; + float3 boxmax = rectboxcenter + sboxvar; + rectboxmax = min(rectboxmax, boxmax); + rectboxmin = max(rectboxmin, boxmin); + + float3 clampedcolor = clamp(HistoryColor, rectboxmin, rectboxmax); + float startLerpValue = MinLerpContribution; //MinLerpContribution; //MinLerpContribution; + if ((abs(mda.x) + abs(mda.y)) > 0.000001) startLerpValue = 0.0; + float lerpcontribution = (any(rectboxmin > HistoryColor) || any(HistoryColor > rectboxmax)) ? startLerpValue : 1.0f; + + HistoryColor = lerp(clampedcolor, HistoryColor, clamp(lerpcontribution, 0.0, 1.0)); + float basemin = min(basealpha, 0.1f); + basealpha = lerp(basemin, basealpha, clamp(lerpcontribution, 0.0, 1.0)); + + ////blend color + float alphasum = max(EPSILON, basealpha + Upsampledcw.w); + float alpha = clamp(Upsampledcw.w / alphasum + float(reset), 0.0, 1.0); + Upsampledcw.xyz = lerp(HistoryColor, Upsampledcw.xyz, alpha); + + HistoryOutput[globalInvocationID.xy] = float4(Upsampledcw.xyz, Wfactor); + + ////ycocg to rgb + float x_z = Upsampledcw.x - Upsampledcw.z; + Upsampledcw.xyz = float3( + clamp(x_z + Upsampledcw.y, 0.0, 1.0), + clamp(Upsampledcw.x + Upsampledcw.z, 0.0, 1.0), + clamp(x_z - Upsampledcw.y, 0.0, 1.0)); + + float compMax = max(Upsampledcw.x, Upsampledcw.y); + compMax = clamp(max(compMax, Upsampledcw.z), 0.0f, 1.0f); + float scale = preExposure / ((1.0f + 600.0f / 65504.0f) - compMax); + + if (ColorMax > 4000.0f) scale = ColorMax; + Upsampledcw.xyz = Upsampledcw.xyz * scale; + SceneColorOutput[globalInvocationID.xy] = Upsampledcw; +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute.meta new file mode 100644 index 0000000..ff1bbc3 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: d7bacd7d04c6521499bef936d93921cc +ComputeShaderImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs index 5aa66cc..928ae3d 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs @@ -53,7 +53,7 @@ namespace UnityEngine.Rendering.PostProcessing parms.renderSizeRcp = new Vector2(1.0f / parms.renderSize.x, 1.0f / parms.renderSize.y); parms.displaySizeRcp = new Vector2(1.0f / parms.displaySize.x, 1.0f / parms.displaySize.y); parms.jitterOffset = config.JitterOffset; - parms.clipToPrevClip = Matrix4x4.identity; // TODO: clipToPrevClip + parms.clipToPrevClip = Matrix4x4.identity; // TODO: clipToPrevClip => (previous_view_proj * inv_vp) parms.preExposure = config.preExposure; parms.cameraFovAngleHor = Mathf.Tan(context.camera.fieldOfView * Mathf.Deg2Rad * 0.5f) * (float)parms.renderSize.x / parms.renderSize.y; parms.cameraNear = context.camera.nearClipPlane; @@ -84,7 +84,7 @@ namespace UnityEngine.Rendering.PostProcessing private void Convert(CommandBuffer cmd, PostProcessRenderContext context, Upscaling config) { - var shader = context.resources.computeShaders.sgsr2Upscaler.convert; + var shader = context.resources.computeShaders.sgsr2Upscaler.threePassCompute.convert; int kernelIndex = shader.FindKernel("CS"); cmd.SetComputeConstantBufferParam(shader, "Params", _paramsBuffer, 0, Marshal.SizeOf()); @@ -103,7 +103,7 @@ namespace UnityEngine.Rendering.PostProcessing private void Activate(CommandBuffer cmd, PostProcessRenderContext context) { - var shader = context.resources.computeShaders.sgsr2Upscaler.activate; + var shader = context.resources.computeShaders.sgsr2Upscaler.threePassCompute.activate; int kernelIndex = shader.FindKernel("CS"); uint frameIndex = _frameCount % 2; @@ -122,7 +122,7 @@ namespace UnityEngine.Rendering.PostProcessing private void Upscale(CommandBuffer cmd, PostProcessRenderContext context) { - var shader = context.resources.computeShaders.sgsr2Upscaler.upscale; + var shader = context.resources.computeShaders.sgsr2Upscaler.threePassCompute.upscale; int kernelIndex = shader.FindKernel("CS"); uint frameIndex = _frameCount % 2; From d4c1d3f56e5bdd6c2d3d936df3945a9226a7cfde Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Fri, 20 Dec 2024 23:34:21 +0100 Subject: [PATCH 04/88] Made SGSR2 upscaler plugin abstract and split off 3-pass CS version into its own separate class --- .../Runtime/Effects/Upscaling.cs | 5 +- .../Effects/Upscaling/SGSR2Upscaler.cs | 113 ++---------------- .../Upscaling/SGSR2Upscaler_3PassCS.cs | 104 ++++++++++++++++ .../Upscaling/SGSR2Upscaler_3PassCS.cs.meta | 3 + 4 files changed, 118 insertions(+), 107 deletions(-) create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_3PassCS.cs create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_3PassCS.cs.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs index 235e09f..0db42ce 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs @@ -16,7 +16,8 @@ namespace UnityEngine.Rendering.PostProcessing [InspectorName("FidelityFX Super Resolution 2.2 (FSR2)")] FSR2, [InspectorName("FidelityFX Super Resolution 3.1 (FSR3)")] FSR3, //[InspectorName("Arm Accuracy Super Resolution (ASR)")] ASR, - [InspectorName("Snapdragon Game Super Resolution 2 (SGSR2)")] SGSR2, + [InspectorName("Snapdragon Game Super Resolution 2 (SGSR2) 2-Pass Compute")] SGSR2_2PassCS, + [InspectorName("Snapdragon Game Super Resolution 2 (SGSR2) 3-Pass Compute")] SGSR2_3PassCS, [InspectorName("PlayStation Spectral Super Resolution (PSSR)")] PSSR, } @@ -180,7 +181,7 @@ namespace UnityEngine.Rendering.PostProcessing { UpscalerType.FSR2 when FSR2Upscaler.IsSupported => new FSR2Upscaler(), UpscalerType.FSR3 when FSR3Upscaler.IsSupported => new FSR3Upscaler(), - UpscalerType.SGSR2 when SGSR2Upscaler.IsSupported => new SGSR2Upscaler(), + UpscalerType.SGSR2_3PassCS when SGSR2Upscaler.IsSupported => new SGSR2Upscaler_3PassCS(), _ => new FSR2Upscaler(), // Fallback for when the selected upscaler is not supported on the current hardware }; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs index 928ae3d..615491e 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs @@ -4,19 +4,19 @@ using UnityEngine.Experimental.Rendering; namespace UnityEngine.Rendering.PostProcessing { - internal class SGSR2Upscaler: Upscaler + internal abstract class SGSR2Upscaler: Upscaler { public static bool IsSupported => SystemInfo.supportsComputeShaders; - private RenderTexture _colorLuma; - private RenderTexture _motionDepthAlpha; - private RenderTexture _motionDepthClipAlpha; - private readonly RenderTexture[] _lumaHistory = new RenderTexture[2]; - private readonly RenderTexture[] _upscaleHistory = new RenderTexture[2]; + protected RenderTexture _colorLuma; + protected RenderTexture _motionDepthAlpha; + protected RenderTexture _motionDepthClipAlpha; + protected readonly RenderTexture[] _lumaHistory = new RenderTexture[2]; + protected readonly RenderTexture[] _upscaleHistory = new RenderTexture[2]; - private readonly ConstantsBuffer _paramsBuffer = new(); + protected readonly ConstantsBuffer _paramsBuffer = new(); - private uint _frameCount = 0; + protected uint _frameCount = 0; public override void CreateContext(PostProcessRenderContext context, Upscaling config) { @@ -41,102 +41,5 @@ namespace UnityEngine.Rendering.PostProcessing DestroyRenderTexture(ref _motionDepthAlpha); DestroyRenderTexture(ref _colorLuma); } - - public override void Render(PostProcessRenderContext context, Upscaling config) - { - var cmd = context.command; - cmd.BeginSample("SGSR2"); - - ref var parms = ref _paramsBuffer.Value; - parms.renderSize = config.GetScaledRenderSize(context.camera); - parms.displaySize = config.UpscaleSize; - parms.renderSizeRcp = new Vector2(1.0f / parms.renderSize.x, 1.0f / parms.renderSize.y); - parms.displaySizeRcp = new Vector2(1.0f / parms.displaySize.x, 1.0f / parms.displaySize.y); - parms.jitterOffset = config.JitterOffset; - parms.clipToPrevClip = Matrix4x4.identity; // TODO: clipToPrevClip => (previous_view_proj * inv_vp) - parms.preExposure = config.preExposure; - parms.cameraFovAngleHor = Mathf.Tan(context.camera.fieldOfView * Mathf.Deg2Rad * 0.5f) * (float)parms.renderSize.x / parms.renderSize.y; - parms.cameraNear = context.camera.nearClipPlane; - parms.minLerpContribution = 0f; - parms.bSameCamera = 0u; - parms.reset = config.Reset ? 1u : 0u; - _paramsBuffer.UpdateBufferData(cmd); - - if (_frameCount == 0 || config.Reset) - { - cmd.SetRenderTarget(_lumaHistory[0]); - cmd.ClearRenderTarget(false, true, Color.clear); - cmd.SetRenderTarget(_lumaHistory[1]); - cmd.ClearRenderTarget(false, true, Color.clear); - cmd.SetRenderTarget(_upscaleHistory[0]); - cmd.ClearRenderTarget(false, true, Color.clear); - cmd.SetRenderTarget(_upscaleHistory[1]); - cmd.ClearRenderTarget(false, true, Color.clear); - } - - Convert(cmd, context, config); - Activate(cmd, context); - Upscale(cmd, context); - - cmd.EndSample("SGSR2"); - _frameCount++; - } - - private void Convert(CommandBuffer cmd, PostProcessRenderContext context, Upscaling config) - { - var shader = context.resources.computeShaders.sgsr2Upscaler.threePassCompute.convert; - int kernelIndex = shader.FindKernel("CS"); - - cmd.SetComputeConstantBufferParam(shader, "Params", _paramsBuffer, 0, Marshal.SizeOf()); - cmd.SetComputeTextureParam(shader, kernelIndex, "InputOpaqueColor", config.ColorOpaqueOnly); - cmd.SetComputeTextureParam(shader, kernelIndex, "InputColor", context.source); - cmd.SetComputeTextureParam(shader, kernelIndex, "InputDepth", BuiltinRenderTextureType.CameraTarget, 0, RenderTextureSubElement.Depth); - cmd.SetComputeTextureParam(shader, kernelIndex, "InputVelocity", BuiltinRenderTextureType.MotionVectors); - cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthAlphaBuffer", _motionDepthAlpha); - cmd.SetComputeTextureParam(shader, kernelIndex, "YCoCgColor", _colorLuma); - - const int threadGroupWorkRegionDim = 8; - int dispatchSrcX = (_paramsBuffer.Value.renderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; - int dispatchSrcY = (_paramsBuffer.Value.renderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; - cmd.DispatchCompute(shader, kernelIndex, dispatchSrcX, dispatchSrcY, 1); - } - - private void Activate(CommandBuffer cmd, PostProcessRenderContext context) - { - var shader = context.resources.computeShaders.sgsr2Upscaler.threePassCompute.activate; - int kernelIndex = shader.FindKernel("CS"); - uint frameIndex = _frameCount % 2; - - cmd.SetComputeConstantBufferParam(shader, "Params", _paramsBuffer, 0, Marshal.SizeOf()); - cmd.SetComputeTextureParam(shader, kernelIndex, "PrevLumaHistory", _lumaHistory[frameIndex ^ 1]); - cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthAlphaBuffer", _motionDepthAlpha); - cmd.SetComputeTextureParam(shader, kernelIndex, "YCoCgColor", _colorLuma); - cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthClipAlphaBuffer", _motionDepthClipAlpha); - cmd.SetComputeTextureParam(shader, kernelIndex, "LumaHistory", _lumaHistory[frameIndex]); - - const int threadGroupWorkRegionDim = 8; - int dispatchSrcX = (_paramsBuffer.Value.renderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; - int dispatchSrcY = (_paramsBuffer.Value.renderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; - cmd.DispatchCompute(shader, kernelIndex, dispatchSrcX, dispatchSrcY, 1); - } - - private void Upscale(CommandBuffer cmd, PostProcessRenderContext context) - { - var shader = context.resources.computeShaders.sgsr2Upscaler.threePassCompute.upscale; - int kernelIndex = shader.FindKernel("CS"); - uint frameIndex = _frameCount % 2; - - cmd.SetComputeConstantBufferParam(shader, "Params", _paramsBuffer, 0, Marshal.SizeOf()); - cmd.SetComputeTextureParam(shader, kernelIndex, "PrevHistoryOutput", _upscaleHistory[frameIndex ^ 1]); - cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthClipAlphaBuffer", _motionDepthClipAlpha); - cmd.SetComputeTextureParam(shader, kernelIndex, "YCoCgColor", _colorLuma); - cmd.SetComputeTextureParam(shader, kernelIndex, "SceneColorOutput", context.destination); - cmd.SetComputeTextureParam(shader, kernelIndex, "HistoryOutput", _upscaleHistory[frameIndex]); - - const int threadGroupWorkRegionDim = 8; - int dispatchDstX = (_paramsBuffer.Value.displaySize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; - int dispatchDstY = (_paramsBuffer.Value.displaySize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; - cmd.DispatchCompute(shader, kernelIndex, dispatchDstX, dispatchDstY, 1); - } } } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_3PassCS.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_3PassCS.cs new file mode 100644 index 0000000..a41aac7 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_3PassCS.cs @@ -0,0 +1,104 @@ +using System.Runtime.InteropServices; + +namespace UnityEngine.Rendering.PostProcessing +{ + internal class SGSR2Upscaler_3PassCS: SGSR2Upscaler + { + public override void Render(PostProcessRenderContext context, Upscaling config) + { + var cmd = context.command; + cmd.BeginSample("SGSR2 3-Pass CS"); + + ref var parms = ref _paramsBuffer.Value; + parms.renderSize = config.GetScaledRenderSize(context.camera); + parms.displaySize = config.UpscaleSize; + parms.renderSizeRcp = new Vector2(1.0f / parms.renderSize.x, 1.0f / parms.renderSize.y); + parms.displaySizeRcp = new Vector2(1.0f / parms.displaySize.x, 1.0f / parms.displaySize.y); + parms.jitterOffset = config.JitterOffset; + parms.clipToPrevClip = Matrix4x4.identity; // TODO: clipToPrevClip => (previous_view_proj * inv_vp) + parms.preExposure = config.preExposure; + parms.cameraFovAngleHor = Mathf.Tan(context.camera.fieldOfView * Mathf.Deg2Rad * 0.5f) * (float)parms.renderSize.x / parms.renderSize.y; + parms.cameraNear = context.camera.nearClipPlane; + parms.minLerpContribution = 0f; + parms.bSameCamera = 0u; + parms.reset = config.Reset ? 1u : 0u; + _paramsBuffer.UpdateBufferData(cmd); + + if (_frameCount == 0 || config.Reset) + { + cmd.SetRenderTarget(_lumaHistory[0]); + cmd.ClearRenderTarget(false, true, Color.clear); + cmd.SetRenderTarget(_lumaHistory[1]); + cmd.ClearRenderTarget(false, true, Color.clear); + cmd.SetRenderTarget(_upscaleHistory[0]); + cmd.ClearRenderTarget(false, true, Color.clear); + cmd.SetRenderTarget(_upscaleHistory[1]); + cmd.ClearRenderTarget(false, true, Color.clear); + } + + Convert(cmd, context, config); + Activate(cmd, context); + Upscale(cmd, context); + + cmd.EndSample("SGSR2 3-Pass CS"); + _frameCount++; + } + + private void Convert(CommandBuffer cmd, PostProcessRenderContext context, Upscaling config) + { + var shader = context.resources.computeShaders.sgsr2Upscaler.threePassCompute.convert; + int kernelIndex = shader.FindKernel("CS"); + + cmd.SetComputeConstantBufferParam(shader, "Params", _paramsBuffer, 0, Marshal.SizeOf()); + cmd.SetComputeTextureParam(shader, kernelIndex, "InputOpaqueColor", config.ColorOpaqueOnly); + cmd.SetComputeTextureParam(shader, kernelIndex, "InputColor", context.source); + cmd.SetComputeTextureParam(shader, kernelIndex, "InputDepth", BuiltinRenderTextureType.CameraTarget, 0, RenderTextureSubElement.Depth); + cmd.SetComputeTextureParam(shader, kernelIndex, "InputVelocity", BuiltinRenderTextureType.MotionVectors); + cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthAlphaBuffer", _motionDepthAlpha); + cmd.SetComputeTextureParam(shader, kernelIndex, "YCoCgColor", _colorLuma); + + const int threadGroupWorkRegionDim = 8; + int dispatchSrcX = (_paramsBuffer.Value.renderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchSrcY = (_paramsBuffer.Value.renderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + cmd.DispatchCompute(shader, kernelIndex, dispatchSrcX, dispatchSrcY, 1); + } + + private void Activate(CommandBuffer cmd, PostProcessRenderContext context) + { + var shader = context.resources.computeShaders.sgsr2Upscaler.threePassCompute.activate; + int kernelIndex = shader.FindKernel("CS"); + uint frameIndex = _frameCount % 2; + + cmd.SetComputeConstantBufferParam(shader, "Params", _paramsBuffer, 0, Marshal.SizeOf()); + cmd.SetComputeTextureParam(shader, kernelIndex, "PrevLumaHistory", _lumaHistory[frameIndex ^ 1]); + cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthAlphaBuffer", _motionDepthAlpha); + cmd.SetComputeTextureParam(shader, kernelIndex, "YCoCgColor", _colorLuma); + cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthClipAlphaBuffer", _motionDepthClipAlpha); + cmd.SetComputeTextureParam(shader, kernelIndex, "LumaHistory", _lumaHistory[frameIndex]); + + const int threadGroupWorkRegionDim = 8; + int dispatchSrcX = (_paramsBuffer.Value.renderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchSrcY = (_paramsBuffer.Value.renderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + cmd.DispatchCompute(shader, kernelIndex, dispatchSrcX, dispatchSrcY, 1); + } + + private void Upscale(CommandBuffer cmd, PostProcessRenderContext context) + { + var shader = context.resources.computeShaders.sgsr2Upscaler.threePassCompute.upscale; + int kernelIndex = shader.FindKernel("CS"); + uint frameIndex = _frameCount % 2; + + cmd.SetComputeConstantBufferParam(shader, "Params", _paramsBuffer, 0, Marshal.SizeOf()); + cmd.SetComputeTextureParam(shader, kernelIndex, "PrevHistoryOutput", _upscaleHistory[frameIndex ^ 1]); + cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthClipAlphaBuffer", _motionDepthClipAlpha); + cmd.SetComputeTextureParam(shader, kernelIndex, "YCoCgColor", _colorLuma); + cmd.SetComputeTextureParam(shader, kernelIndex, "SceneColorOutput", context.destination); + cmd.SetComputeTextureParam(shader, kernelIndex, "HistoryOutput", _upscaleHistory[frameIndex]); + + const int threadGroupWorkRegionDim = 8; + int dispatchDstX = (_paramsBuffer.Value.displaySize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchDstY = (_paramsBuffer.Value.displaySize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + cmd.DispatchCompute(shader, kernelIndex, dispatchDstX, dispatchDstY, 1); + } + } +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_3PassCS.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_3PassCS.cs.meta new file mode 100644 index 0000000..4f78d31 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_3PassCS.cs.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: 78943b31437146f29ec0a7d8d67eb5cc +timeCreated: 1734733770 \ No newline at end of file From 0f7baf0b52ea2fc0e6799ca442f022117e859eb3 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Fri, 20 Dec 2024 23:40:04 +0100 Subject: [PATCH 05/88] Implemented 2-pass CS dispatch --- .../Runtime/Effects/Upscaling.cs | 1 + .../Upscaling/SGSR2Upscaler_2PassCS.cs | 83 +++++++++++++++++++ .../Upscaling/SGSR2Upscaler_2PassCS.cs.meta | 3 + 3 files changed, 87 insertions(+) create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs index 0db42ce..9848437 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs @@ -181,6 +181,7 @@ namespace UnityEngine.Rendering.PostProcessing { UpscalerType.FSR2 when FSR2Upscaler.IsSupported => new FSR2Upscaler(), UpscalerType.FSR3 when FSR3Upscaler.IsSupported => new FSR3Upscaler(), + UpscalerType.SGSR2_2PassCS when SGSR2Upscaler.IsSupported => new SGSR2Upscaler_2PassCS(), UpscalerType.SGSR2_3PassCS when SGSR2Upscaler.IsSupported => new SGSR2Upscaler_3PassCS(), _ => new FSR2Upscaler(), // Fallback for when the selected upscaler is not supported on the current hardware }; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs new file mode 100644 index 0000000..c2a7600 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs @@ -0,0 +1,83 @@ +using System.Runtime.InteropServices; + +namespace UnityEngine.Rendering.PostProcessing +{ + internal class SGSR2Upscaler_2PassCS: SGSR2Upscaler + { + public override void Render(PostProcessRenderContext context, Upscaling config) + { + var cmd = context.command; + cmd.BeginSample("SGSR2 2-Pass CS"); + + ref var parms = ref _paramsBuffer.Value; + parms.renderSize = config.GetScaledRenderSize(context.camera); + parms.displaySize = config.UpscaleSize; + parms.renderSizeRcp = new Vector2(1.0f / parms.renderSize.x, 1.0f / parms.renderSize.y); + parms.displaySizeRcp = new Vector2(1.0f / parms.displaySize.x, 1.0f / parms.displaySize.y); + parms.jitterOffset = config.JitterOffset; + parms.clipToPrevClip = Matrix4x4.identity; // TODO: clipToPrevClip => (previous_view_proj * inv_vp) + parms.preExposure = config.preExposure; + parms.cameraFovAngleHor = Mathf.Tan(context.camera.fieldOfView * Mathf.Deg2Rad * 0.5f) * (float)parms.renderSize.x / parms.renderSize.y; + parms.cameraNear = context.camera.nearClipPlane; + parms.minLerpContribution = 0f; + parms.bSameCamera = 0u; + parms.reset = config.Reset ? 1u : 0u; + _paramsBuffer.UpdateBufferData(cmd); + + if (_frameCount == 0 || config.Reset) + { + cmd.SetRenderTarget(_lumaHistory[0]); + cmd.ClearRenderTarget(false, true, Color.clear); + cmd.SetRenderTarget(_lumaHistory[1]); + cmd.ClearRenderTarget(false, true, Color.clear); + cmd.SetRenderTarget(_upscaleHistory[0]); + cmd.ClearRenderTarget(false, true, Color.clear); + cmd.SetRenderTarget(_upscaleHistory[1]); + cmd.ClearRenderTarget(false, true, Color.clear); + } + + Convert(cmd, context, config); + Upscale(cmd, context); + + cmd.EndSample("SGSR2 2-Pass CS"); + _frameCount++; + } + + private void Convert(CommandBuffer cmd, PostProcessRenderContext context, Upscaling config) + { + var shader = context.resources.computeShaders.sgsr2Upscaler.twoPassCompute.convert; + int kernelIndex = shader.FindKernel("CS"); + + cmd.SetComputeConstantBufferParam(shader, "Params", _paramsBuffer, 0, Marshal.SizeOf()); + cmd.SetComputeTextureParam(shader, kernelIndex, "InputColor", context.source); + cmd.SetComputeTextureParam(shader, kernelIndex, "InputDepth", BuiltinRenderTextureType.CameraTarget, 0, RenderTextureSubElement.Depth); + cmd.SetComputeTextureParam(shader, kernelIndex, "InputVelocity", BuiltinRenderTextureType.MotionVectors); + cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthClipAlphaBuffer", _motionDepthClipAlpha); + cmd.SetComputeTextureParam(shader, kernelIndex, "YCoCgColor", _colorLuma); + + const int threadGroupWorkRegionDim = 8; + int dispatchSrcX = (_paramsBuffer.Value.renderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchSrcY = (_paramsBuffer.Value.renderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + cmd.DispatchCompute(shader, kernelIndex, dispatchSrcX, dispatchSrcY, 1); + } + + private void Upscale(CommandBuffer cmd, PostProcessRenderContext context) + { + var shader = context.resources.computeShaders.sgsr2Upscaler.twoPassCompute.upscale; + int kernelIndex = shader.FindKernel("CS"); + uint frameIndex = _frameCount % 2; + + cmd.SetComputeConstantBufferParam(shader, "Params", _paramsBuffer, 0, Marshal.SizeOf()); + cmd.SetComputeTextureParam(shader, kernelIndex, "PrevHistoryOutput", _upscaleHistory[frameIndex ^ 1]); + cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthClipAlphaBuffer", _motionDepthClipAlpha); + cmd.SetComputeTextureParam(shader, kernelIndex, "YCoCgColor", _colorLuma); + cmd.SetComputeTextureParam(shader, kernelIndex, "SceneColorOutput", context.destination); + cmd.SetComputeTextureParam(shader, kernelIndex, "HistoryOutput", _upscaleHistory[frameIndex]); + + const int threadGroupWorkRegionDim = 8; + int dispatchDstX = (_paramsBuffer.Value.displaySize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchDstY = (_paramsBuffer.Value.displaySize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + cmd.DispatchCompute(shader, kernelIndex, dispatchDstX, dispatchDstY, 1); + } + } +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs.meta new file mode 100644 index 0000000..a49756d --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: 3d5127688822e654084c665f84c0c3e0 +timeCreated: 1734733770 \ No newline at end of file From 7217b0fc324fedd1a132dac8a83e4cec8e3f9d15 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 21 Dec 2024 12:51:23 +0100 Subject: [PATCH 06/88] Unity-fied all SGSR2 shaders by changing all texture declarations and accesses into TextureXR-style macros, with a shared BiRP definitions file. --- .../Shaders/2_pass_cs/sgsr2_convert.compute | 53 ++++++--------- .../Shaders/2_pass_cs/sgsr2_upscale.compute | 43 ++++++------- .../Shaders/3_pass_cs/sgsr2_activate.compute | 41 ++++++------ .../Shaders/3_pass_cs/sgsr2_convert.compute | 64 ++++++++----------- .../Shaders/3_pass_cs/sgsr2_upscale.compute | 43 ++++++------- .../Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl | 26 ++++++++ .../SGSR2/Shaders/sgsr2_birp.hlsl.meta | 3 + 7 files changed, 139 insertions(+), 134 deletions(-) create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute index 18c822a..8f076e7 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute @@ -1,4 +1,5 @@ #pragma kernel CS +#include "../sgsr2_birp.hlsl" //============================================================================================================ // @@ -8,22 +9,13 @@ // //============================================================================================================ -float2 decodeVelocityFromTexture(float2 ev) { - const float inv_div = 1.0f / (0.499f * 0.5f); - float2 dv; - dv.xy = ev.xy * inv_div - 32767.0f / 65535.0f * inv_div; - //dv.z = uintBitsToFloat((uint(round(ev.z * 65535.0f)) << 16) | uint(round(ev.w * 65535.0f))); - return dv; -} - -Texture2D InputColor : register(t0); -Texture2D InputDepth : register(t1); -Texture2D InputVelocity : register(t2); -RWTexture2D MotionDepthClipAlphaBuffer : register(u0); -RWTexture2D YCoCgColor : register(u1); +TEXTURE2D_X(InputColor) : register(t0); +TYPED_TEXTURE2D_X(float, InputDepth) : register(t1); +TYPED_TEXTURE2D_X(float2, InputVelocity) : register(t2); +RW_TEXTURE2D_X(float4, MotionDepthClipAlphaBuffer) : register(u0); +RW_TEXTURE2D_X(uint, YCoCgColor) : register(u1); -cbuffer Params : register(b0) -{ +CBUFFER_START(Params) uint2 renderSize; uint2 displaySize; float2 renderSizeRcp; @@ -37,31 +29,30 @@ cbuffer Params : register(b0) float MinLerpContribution; uint bSameCamera; uint reset; -}; - -SamplerState s_PointClamp : register(s0); -SamplerState s_LinearClamp : register(s1); +CBUFFER_END [numthreads(8, 8, 1)] -void CS(uint3 globalInvocationID : SV_DispatchThreadID) +void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) { + UNITY_XR_ASSIGN_VIEW_INDEX(gl_GlobalInvocationID.z); + float Exposure_co_rcp = preExposure; float2 ViewportSizeInverse = displaySizeRcp.xy; - uint2 InputPos = globalInvocationID.xy; + uint2 InputPos = gl_GlobalInvocationID.xy; - float2 gatherCoord = float2(globalInvocationID.xy) * ViewportSizeInverse; + float2 gatherCoord = float2(gl_GlobalInvocationID.xy) * ViewportSizeInverse; float2 ViewportUV = gatherCoord + 0.5f * ViewportSizeInverse; //derived from ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h //FindNearestDepth - float4 topleft = InputDepth.GatherRed(s_PointClamp, gatherCoord); + float4 topleft = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, gatherCoord); float2 v10 = float2(ViewportSizeInverse.x*2.0, 0.0); - float4 topRight = InputDepth.GatherRed(s_PointClamp, (gatherCoord+v10)); + float4 topRight = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, (gatherCoord+v10)); float2 v12 = float2(0.0, ViewportSizeInverse.y*2.0); - float4 bottomLeft = InputDepth.GatherRed(s_PointClamp, (gatherCoord+v12)); + float4 bottomLeft = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, (gatherCoord+v12)); float2 v14 = float2(ViewportSizeInverse.x*2.0, ViewportSizeInverse.y*2.0); - float4 bottomRight = InputDepth.GatherRed(s_PointClamp, (gatherCoord+v14)); + float4 bottomRight = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, (gatherCoord+v14)); float maxC = max(max(max(topleft.y,topRight.x),bottomLeft.z),bottomRight.w); float topleft4 = max(max(max(topleft.y,topleft.x),topleft.z),topleft.w); float topLeftMax9 = max(bottomLeft.w,max(max(maxC,topleft4),topRight.w)); @@ -90,7 +81,7 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) //refer to ue/fsr2 PostProcessFFX_FSR2ConvertVelocity.usf, and using nearest depth for dilated motion - float2 EncodedVelocity = InputVelocity[InputPos]; + float2 EncodedVelocity = LOAD_TEXTURE2D_X(InputVelocity, InputPos); float2 motion; if (EncodedVelocity.x > 0.0) @@ -110,10 +101,8 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) motion = Position.xy - PreScreen; } - motion = EncodedVelocity; - ////////////compute luma - float3 Colorrgb = InputColor[InputPos].xyz; + float3 Colorrgb = LOAD_TEXTURE2D_X(InputColor, InputPos).xyz; ///simple tonemap float ColorMax = max(max(Colorrgb.x, Colorrgb.y), Colorrgb.z) + Exposure_co_rcp; @@ -129,8 +118,8 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) uint y11 = uint(Colorycocg.y * 2047.5); uint z10 = uint(Colorycocg.z * 1023.5); - YCoCgColor[InputPos] = ((x11 << 21u) | (y11 << 10u)) | z10; + YCoCgColor[COORD_TEXTURE2D_X(InputPos)] = ((x11 << 21u) | (y11 << 10u)) | z10; float4 v29 = float4(motion, depthclip, ColorMax); - MotionDepthClipAlphaBuffer[InputPos] = v29; + MotionDepthClipAlphaBuffer[COORD_TEXTURE2D_X(InputPos)] = v29; } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute index 6c541d2..b4983ed 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute @@ -1,4 +1,5 @@ #pragma kernel CS +#include "../sgsr2_birp.hlsl" //============================================================================================================ // @@ -29,14 +30,13 @@ float3 DecodeColor(uint sample32) return samplecolor; } -Texture2D PrevHistoryOutput : register(t0); -Texture2D MotionDepthClipAlphaBuffer : register(t1); -Texture2D YCoCgColor : register(t2); -RWTexture2D SceneColorOutput : register(u0); -RWTexture2D HistoryOutput : register(u1); +TEXTURE2D_X(PrevHistoryOutput) : register(t0); +TEXTURE2D_X(MotionDepthClipAlphaBuffer) : register(t1); +TYPED_TEXTURE2D_X(uint, YCoCgColor) : register(t2); +RW_TEXTURE2D_X(float4, SceneColorOutput) : register(u0); +RW_TEXTURE2D_X(float4, HistoryOutput) : register(u1); -cbuffer Params : register(b0) -{ +CBUFFER_START(Params) uint2 renderSize; uint2 displaySize; float2 renderSizeRcp; @@ -50,14 +50,13 @@ cbuffer Params : register(b0) float MinLerpContribution; uint bSameCamera; uint reset; -}; - -SamplerState s_PointClamp : register(s0); -SamplerState s_LinearClamp : register(s1); +CBUFFER_END [numthreads(8, 8, 1)] -void CS(uint3 globalInvocationID : SV_DispatchThreadID) +void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) { + UNITY_XR_ASSIGN_VIEW_INDEX(gl_GlobalInvocationID.z); + float Biasmax_viewportXScale = min(float(displaySize.x) / float(renderSize.x), 1.99); //Biasmax_viewportXScale float scalefactor = min(20.0, pow((float(displaySize.x) / float(renderSize.x)) * (float(displaySize.y) / float(renderSize.y)), 3.0)); float f2 = preExposure; //1.0; //preExposure @@ -65,13 +64,13 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) float2 HistoryInfoViewportSize = float2(displaySize); float2 InputJitter = jitterOffset; float2 InputInfoViewportSize = float2(renderSize); - float2 Hruv = (float2(globalInvocationID.xy) + 0.5f) * HistoryInfoViewportSizeInverse; + float2 Hruv = (float2(gl_GlobalInvocationID.xy) + 0.5f) * HistoryInfoViewportSizeInverse; float2 Jitteruv; Jitteruv.x = clamp(Hruv.x + (InputJitter.x * HistoryInfoViewportSizeInverse.x), 0.0, 1.0); Jitteruv.y = clamp(Hruv.y + (InputJitter.y * HistoryInfoViewportSizeInverse.y), 0.0, 1.0); int2 InputPos = int2(Jitteruv * InputInfoViewportSize); - float4 mda = MotionDepthClipAlphaBuffer.SampleLevel(s_LinearClamp, Jitteruv, 0).xyzw; + float4 mda = SAMPLE_TEXTURE2D_X_LOD(MotionDepthClipAlphaBuffer, S_LINEAR_CLAMP, Jitteruv, 0).xyzw; float2 Motion = mda.xy; ///ScreenPosToViewportScale&Bias @@ -86,7 +85,7 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) float depthfactor = mda.z; float ColorMax = mda.w; - float4 History = PrevHistoryOutput.SampleLevel(s_LinearClamp, PrevUV, 0); + float4 History = SAMPLE_TEXTURE2D_X_LOD(PrevHistoryOutput, S_LINEAR_CLAMP, PrevUV, 0); float3 HistoryColor = History.xyz; float Historyw = History.w; float Wfactor = clamp(abs(Historyw), 0.0, 1.0); @@ -113,8 +112,8 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) int2 InputPosBtmRight = 1 + InputPos; float2 gatherCoord = float2(InputPos) * renderSizeRcp; - uint btmRight = YCoCgColor[InputPosBtmRight].x; - uint4 topleft = YCoCgColor.GatherRed(s_PointClamp, gatherCoord); + uint btmRight = LOAD_TEXTURE2D_X(YCoCgColor, InputPosBtmRight).x; + uint4 topleft = GATHER_RED_TEXTURE2D_X(YCoCgColor, S_POINT_CLAMP, gatherCoord); uint2 topRight = 0; uint2 bottomLeft = 0; @@ -122,12 +121,12 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) if (sameCameraFrmNum!=0u) { - topRight = YCoCgColor.GatherRed(s_PointClamp, gatherCoord + float2(renderSizeRcp.x, 0.0)).yz; - bottomLeft = YCoCgColor.GatherRed(s_PointClamp, gatherCoord + float2(0.0, renderSizeRcp.y)).xy; + topRight = GATHER_RED_TEXTURE2D_X(YCoCgColor, S_POINT_CLAMP, gatherCoord + float2(renderSizeRcp.x, 0.0)).yz; + bottomLeft = GATHER_RED_TEXTURE2D_X(YCoCgColor, S_POINT_CLAMP, gatherCoord + float2(0.0, renderSizeRcp.y)).xy; } else { - uint2 btmRight = YCoCgColor.GatherRed(s_PointClamp, gatherCoord + float2(renderSizeRcp.x, renderSizeRcp.y)).xz; + uint2 btmRight = GATHER_RED_TEXTURE2D_X(YCoCgColor, S_POINT_CLAMP, gatherCoord + float2(renderSizeRcp.x, renderSizeRcp.y)).xz; bottomLeft.y = btmRight.x; topRight.x = btmRight.y; } @@ -313,7 +312,7 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) float alpha = clamp(Upsampledcw.w / alphasum + float(reset), 0.0, 1.0); Upsampledcw.xyz = lerp(HistoryColor, Upsampledcw.xyz, alpha); - HistoryOutput[globalInvocationID.xy] = float4(Upsampledcw.xyz, Wfactor); + HistoryOutput[COORD_TEXTURE2D_X(gl_GlobalInvocationID.xy)] = float4(Upsampledcw.xyz, Wfactor); ////ycocg to rgb float x_z = Upsampledcw.x - Upsampledcw.z; @@ -328,5 +327,5 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) if (ColorMax > 4000.0f) scale = ColorMax; Upsampledcw.xyz = Upsampledcw.xyz * scale; - SceneColorOutput[globalInvocationID.xy] = Upsampledcw; + SceneColorOutput[COORD_TEXTURE2D_X(gl_GlobalInvocationID.xy)] = Upsampledcw; } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute index 53223c3..fd2c83d 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute @@ -1,4 +1,5 @@ #pragma kernel CS +#include "../sgsr2_birp.hlsl" // TODO: what about REQUEST_NDC_Y_UP? Might be graphics API-dependent, look at Unity's shader includes. @@ -27,14 +28,13 @@ float2 unpackHalf2x16(uint x) return f16tof32(uint2(x & 0xFFFF, x >> 16)); } -Texture2D PrevLumaHistory : register(t0); -Texture2D MotionDepthAlphaBuffer : register(t1); -Texture2D YCoCgColor : register(t2); -RWTexture2D MotionDepthClipAlphaBuffer : register(u0); -RWTexture2D LumaHistory : register(u1); +TYPED_TEXTURE2D_X(uint, PrevLumaHistory) : register(t0); +TEXTURE2D_X(MotionDepthAlphaBuffer) : register(t1); +TYPED_TEXTURE2D_X(uint, YCoCgColor) : register(t2); +RW_TEXTURE2D_X(float4, MotionDepthClipAlphaBuffer) : register(u0); +RW_TEXTURE2D_X(uint, LumaHistory) : register(u1); -cbuffer Params : register(b0) -{ +CBUFFER_START(Params) uint2 renderSize; uint2 displaySize; float2 ViewportSizeInverse; @@ -48,14 +48,13 @@ cbuffer Params : register(b0) float MinLerpContribution; uint bSameCamera; uint reset; -}; - -SamplerState s_PointClamp : register(s0); -SamplerState s_LinearClamp : register(s1); +CBUFFER_END [numthreads(8, 8, 1)] -void CS(uint3 globalInvocationID : SV_DispatchThreadID) +void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) { + UNITY_XR_ASSIGN_VIEW_INDEX(gl_GlobalInvocationID.z); + int2 sampleOffset[4] = { int2(-1, -1), int2(-1, +0), @@ -63,14 +62,14 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) int2(+0, +0) }; - uint2 InputPos = globalInvocationID.xy; + uint2 InputPos = gl_GlobalInvocationID.xy; - float2 ViewportUV = (float2(globalInvocationID.xy) + 0.5f) * ViewportSizeInverse; + float2 ViewportUV = (float2(gl_GlobalInvocationID.xy) + 0.5f) * ViewportSizeInverse; float2 gatherCoord = ViewportUV + 0.5f * ViewportSizeInverse; - uint luma_reference32 = YCoCgColor.GatherRed(s_PointClamp, gatherCoord).w; + uint luma_reference32 = GATHER_RED_TEXTURE2D_X(YCoCgColor, S_POINT_CLAMP, gatherCoord).w; float luma_reference = DecodeColorY(luma_reference32); - float4 mda = MotionDepthAlphaBuffer[globalInvocationID.xy].xyzw; //motion depth alpha + float4 mda = LOAD_TEXTURE2D_X(MotionDepthAlphaBuffer, gl_GlobalInvocationID.xy).xyzw; //motion depth alpha float depth = mda.z; float alphamask = mda.w; float2 motion = mda.xy; @@ -101,7 +100,7 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) float Kfov = cameraFovAngleHor; float Ksep_Kfov_diagonal = Ksep * Kfov * diagonal_length; for (int index = 0; index < 4; index+=2){ - float4 gPrevdepth = MotionDepthAlphaBuffer.GatherBlue(s_PointClamp, PrevUV, sampleOffset[index]); + float4 gPrevdepth = GATHER_BLUE_TEXTURE2D_X_OFFSET(MotionDepthAlphaBuffer, S_POINT_CLAMP, PrevUV, sampleOffset[index]); float tdepth1 = min(gPrevdepth.x, gPrevdepth.y); float tdepth2 = min(gPrevdepth.z, gPrevdepth.w); float fPrevdepth = min(tdepth1, tdepth2); @@ -110,7 +109,7 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) float weight = Bilinweights[index]; Wdepth += clamp(Depthsep / (abs(fPrevdepth - depth) + EPSILON), 0.0, 1.0) * weight; - float2 gPrevdepth2 = MotionDepthAlphaBuffer.GatherBlue(s_PointClamp, PrevUV, sampleOffset[index + int(1)]).zw; + float2 gPrevdepth2 = GATHER_BLUE_TEXTURE2D_X_OFFSET(MotionDepthAlphaBuffer, S_POINT_CLAMP, PrevUV, sampleOffset[index + int(1)]).zw; fPrevdepth = min(min(gPrevdepth2.x, gPrevdepth2.y), tdepth2); Depthsep = Ksep_Kfov_diagonal * (1.0 - min(fPrevdepth, depth)); weight = Bilinweights[index + int(1)]; @@ -120,7 +119,7 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) } float2 current_luma_diff; - uint prev_luma_diff_pack = PrevLumaHistory.GatherRed(s_PointClamp, PrevUV).w; + uint prev_luma_diff_pack = GATHER_RED_TEXTURE2D_X(PrevLumaHistory, S_POINT_CLAMP, PrevUV).w; float2 prev_luma_diff; prev_luma_diff.x = unpackHalf2x16(prev_luma_diff_pack >> 16u).x; prev_luma_diff.y = unpackHalf2x16((prev_luma_diff_pack & uint(0xFFFF))).x; @@ -141,6 +140,6 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) } alphamask = floor(alphamask) + 0.5f * float((current_luma_diff.x != 0.0f) && (abs(current_luma_diff.y) != abs(luma_diff))); - LumaHistory[InputPos] = (packHalf2x16(float2(current_luma_diff.x, 0.0)) << 16u) | packHalf2x16(float2(current_luma_diff.y, 0.0)); - MotionDepthClipAlphaBuffer[InputPos] = float4(motion, depthclip, alphamask); + LumaHistory[COORD_TEXTURE2D_X(InputPos)] = (packHalf2x16(float2(current_luma_diff.x, 0.0)) << 16u) | packHalf2x16(float2(current_luma_diff.y, 0.0)); + MotionDepthClipAlphaBuffer[COORD_TEXTURE2D_X(InputPos)] = float4(motion, depthclip, alphamask); } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute index 2b8ea0e..20ef3fd 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute @@ -1,4 +1,5 @@ #pragma kernel CS +#include "../sgsr2_birp.hlsl" //============================================================================================================ // @@ -8,23 +9,14 @@ // //============================================================================================================ -float2 decodeVelocityFromTexture(float2 ev) { - const float inv_div = 1.0f / (0.499f * 0.5f); - float2 dv; - dv.xy = ev.xy * inv_div - 32767.0f / 65535.0f * inv_div; - //dv.z = uintBitsToFloat((uint(round(ev.z * 65535.0f)) << 16) | uint(round(ev.w * 65535.0f))); - return dv; -} - -Texture2D InputOpaqueColor : register(t0); -Texture2D InputColor : register(t1); -Texture2D InputDepth : register(t2); -Texture2D InputVelocity : register(t3); -RWTexture2D MotionDepthAlphaBuffer : register(u0); -RWTexture2D YCoCgColor : register(u1); +TEXTURE2D_X(InputOpaqueColor) : register(t0); +TEXTURE2D_X(InputColor) : register(t1); +TYPED_TEXTURE2D_X(float, InputDepth) : register(t2); +TYPED_TEXTURE2D_X(float2, InputVelocity) : register(t3); +RW_TEXTURE2D_X(float4, MotionDepthAlphaBuffer) : register(u0); +RW_TEXTURE2D_X(uint, YCoCgColor) : register(u1); -cbuffer Params : register(b0) -{ +CBUFFER_START(Params) uint2 renderSize; uint2 displaySize; float2 ViewportSizeInverse; @@ -38,25 +30,26 @@ cbuffer Params : register(b0) float MinLerpContribution; uint bSameCamera; uint reset; -}; - -SamplerState s_PointClamp : register(s0); -SamplerState s_LinearClamp : register(s1); +CBUFFER_END [numthreads(8, 8, 1)] -void CS(uint3 globalInvocationID : SV_DispatchThreadID) +void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) { - float2 gatherCoord = float2(globalInvocationID.xy) * ViewportSizeInverse; + UNITY_XR_ASSIGN_VIEW_INDEX(gl_GlobalInvocationID.z); + + half h0 = preExposure; + uint2 InputPos = gl_GlobalInvocationID.xy; + + float2 gatherCoord = float2(gl_GlobalInvocationID.xy) * ViewportSizeInverse; float2 ViewportUV = gatherCoord + 0.5f * ViewportSizeInverse; - uint2 InputPos = globalInvocationID.xy; //derived from ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h //FindNearestDepth int2 InputPosBtmRight = int2(1, 1) + int2(InputPos); - float NearestZ = InputDepth[InputPosBtmRight].x; + float NearestZ = LOAD_TEXTURE2D_X(InputDepth, InputPosBtmRight).x; - float4 topleft = InputDepth.GatherRed(s_PointClamp, gatherCoord); + float4 topleft = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, gatherCoord); NearestZ = max(topleft.x, NearestZ); NearestZ = max(topleft.y, NearestZ); @@ -64,21 +57,20 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) NearestZ = max(topleft.w, NearestZ); float2 v11 = float2(ViewportSizeInverse.x, 0.0); - float2 topRight = InputDepth.GatherRed(s_PointClamp, (gatherCoord + v11)).yz; + float2 topRight = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, (gatherCoord + v11)).yz; NearestZ = max(topRight.x, NearestZ); NearestZ = max(topRight.y, NearestZ); float2 v13 = float2(0.0, ViewportSizeInverse.y); - float2 bottomLeft = InputDepth.GatherRed(s_PointClamp, (gatherCoord + v13)).xy; + float2 bottomLeft = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, (gatherCoord + v13)).xy; NearestZ = max(bottomLeft.x, NearestZ); NearestZ = max(bottomLeft.y, NearestZ); //refer to ue/fsr2 PostProcessFFX_FSR2ConvertVelocity.usf, and using nearest depth for dilated motion - // TODO: wondering if this whole song and dance about decoding velocity is really necessary for Unity - float2 EncodedVelocity = InputVelocity[InputPos]; + float2 EncodedVelocity = LOAD_TEXTURE2D_X(InputVelocity, InputPos); float2 motion; if (EncodedVelocity.x > 0.0) @@ -98,13 +90,11 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) motion = Position.xy - PreScreen; } - motion = EncodedVelocity; - ////////////compute luma - float3 Colorrgb = InputColor[InputPos].xyz; + float3 Colorrgb = LOAD_TEXTURE2D_X(InputColor, InputPos).xyz; ///simple tonemap - Colorrgb /= max(max(Colorrgb.x, Colorrgb.y), Colorrgb.z) + preExposure; + Colorrgb /= max(max(Colorrgb.x, Colorrgb.y), Colorrgb.z) + h0; float3 Colorycocg; Colorycocg.x = 0.25 * (Colorrgb.x + 2.0 * Colorrgb.y + Colorrgb.z); @@ -116,14 +106,14 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) uint y11 = uint(Colorycocg.y * 2047.5); uint z10 = uint(Colorycocg.z * 1023.5); - float3 Colorprergb = InputOpaqueColor[InputPos].xyz; + float3 Colorprergb = LOAD_TEXTURE2D_X(InputOpaqueColor, InputPos).xyz; ///simple tonemap - Colorprergb /= max(max(Colorprergb.x, Colorprergb.y), Colorprergb.z) + preExposure; + Colorprergb /= max(max(Colorprergb.x, Colorprergb.y), Colorprergb.z) + h0; float3 delta = abs(Colorrgb - Colorprergb); float alpha_mask = max(delta.x, max(delta.y, delta.z)); alpha_mask = (0.35f * 1000.0f) * alpha_mask; - YCoCgColor[InputPos] = ((x11 << 21u) | (y11 << 10u)) | z10; - MotionDepthAlphaBuffer[InputPos] = float4(motion, NearestZ, alpha_mask); + YCoCgColor[COORD_TEXTURE2D_X(InputPos)] = ((x11 << 21u) | (y11 << 10u)) | z10; + MotionDepthAlphaBuffer[COORD_TEXTURE2D_X(InputPos)] = float4(motion, NearestZ, alpha_mask); } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute index 0c39ebe..2417c57 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute @@ -1,4 +1,5 @@ #pragma kernel CS +#include "../sgsr2_birp.hlsl" //============================================================================================================ // @@ -29,14 +30,13 @@ float3 DecodeColor(uint sample32) return samplecolor; } -Texture2D PrevHistoryOutput : register(t0); -Texture2D MotionDepthClipAlphaBuffer : register(t1); -Texture2D YCoCgColor : register(t2); -RWTexture2D SceneColorOutput : register(u0); -RWTexture2D HistoryOutput : register(u1); +TEXTURE2D_X(PrevHistoryOutput) : register(t0); +TEXTURE2D_X(MotionDepthClipAlphaBuffer) : register(t1); +TYPED_TEXTURE2D_X(uint, YCoCgColor) : register(t2); +RW_TEXTURE2D_X(float4, SceneColorOutput) : register(u0); +RW_TEXTURE2D_X(float4, HistoryOutput) : register(u1); -cbuffer Params : register(b0) -{ +CBUFFER_START(Params) uint2 renderSize; uint2 displaySize; float2 renderSizeRcp; @@ -50,14 +50,13 @@ cbuffer Params : register(b0) float MinLerpContribution; uint bSameCamera; uint reset; -}; - -SamplerState s_PointClamp : register(s0); -SamplerState s_LinearClamp : register(s1); +CBUFFER_END [numthreads(8, 8, 1)] -void CS(uint3 globalInvocationID : SV_DispatchThreadID) +void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) { + UNITY_XR_ASSIGN_VIEW_INDEX(gl_GlobalInvocationID.z); + float Biasmax_viewportXScale = min(float(displaySize.x) / float(renderSize.x), 1.99); //Biasmax_viewportXScale float scalefactor = min(20.0, pow((float(displaySize.x) / float(renderSize.x)) * (float(displaySize.y) / float(renderSize.y)), 3.0)); float f2 = preExposure; //1.0; //preExposure @@ -65,7 +64,7 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) float2 HistoryInfoViewportSize = float2(displaySize); float2 InputJitter = jitterOffset; float2 InputInfoViewportSize = float2(renderSize); - float2 Hruv = (float2(globalInvocationID.xy) + 0.5f) * HistoryInfoViewportSizeInverse; + float2 Hruv = (float2(gl_GlobalInvocationID.xy) + 0.5f) * HistoryInfoViewportSizeInverse; float2 Jitteruv; Jitteruv.x = clamp(Hruv.x + (InputJitter.x * HistoryInfoViewportSizeInverse.x), 0.0, 1.0); Jitteruv.y = clamp(Hruv.y + (InputJitter.y * HistoryInfoViewportSizeInverse.y), 0.0, 1.0); @@ -73,8 +72,8 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) int2 InputPos = int2(Jitteruv * InputInfoViewportSize); //float2 Motion = texelFetch(MotionDepthClipAlphaBuffer, InputPos, 0).xy; - float alphab = MotionDepthClipAlphaBuffer[InputPos].w; - float3 mda = MotionDepthClipAlphaBuffer.SampleLevel(s_LinearClamp, Jitteruv, 0).xyz; + float alphab = LOAD_TEXTURE2D_X(MotionDepthClipAlphaBuffer, InputPos).w; + float3 mda = SAMPLE_TEXTURE2D_X_LOD(MotionDepthClipAlphaBuffer, S_LINEAR_CLAMP, Jitteruv, 0).xyz; float2 Motion = mda.xy; ///ScreenPosToViewportScale&Bias @@ -91,7 +90,7 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) float alphamask = (alphab - history_value) * 0.001f; history_value *= 2.0; - float4 History = PrevHistoryOutput.SampleLevel(s_LinearClamp, PrevUV, 0); + float4 History = SAMPLE_TEXTURE2D_X_LOD(PrevHistoryOutput, S_LINEAR_CLAMP, PrevUV, 0); float3 HistoryColor = History.xyz; float Historyw = History.w; float Wfactor = max(clamp(abs(Historyw), 0.0, 1.0), alphamask); @@ -118,10 +117,10 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) int2 InputPosBtmRight = 1 + InputPos; float2 gatherCoord = float2(InputPos) * renderSizeRcp; - uint btmRight = YCoCgColor[InputPosBtmRight].x; - uint4 topleft = YCoCgColor.GatherRed(s_PointClamp, gatherCoord); - uint2 topRight = YCoCgColor.GatherRed(s_PointClamp, gatherCoord + float2(renderSizeRcp.x, 0.0)).yz; - uint2 bottomLeft = YCoCgColor.GatherRed(s_PointClamp, gatherCoord + float2(0.0, renderSizeRcp.y)).xy; + uint btmRight = LOAD_TEXTURE2D_X(YCoCgColor, InputPosBtmRight).x; + uint4 topleft = GATHER_RED_TEXTURE2D_X(YCoCgColor, S_POINT_CLAMP, gatherCoord); + uint2 topRight = GATHER_RED_TEXTURE2D_X(YCoCgColor, S_POINT_CLAMP, gatherCoord + float2(renderSizeRcp.x, 0.0)).yz; + uint2 bottomLeft = GATHER_RED_TEXTURE2D_X(YCoCgColor, S_POINT_CLAMP, gatherCoord + float2(0.0, renderSizeRcp.y)).xy; float3 rectboxmin; float3 rectboxmax; @@ -298,7 +297,7 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) float alpha = clamp(Upsampledcw.w / alphasum + float(reset), 0.0, 1.0); Upsampledcw.xyz = lerp(HistoryColor, Upsampledcw.xyz, alpha); - HistoryOutput[globalInvocationID.xy] = float4(Upsampledcw.xyz, Wfactor); + HistoryOutput[COORD_TEXTURE2D_X(gl_GlobalInvocationID.xy)] = float4(Upsampledcw.xyz, Wfactor); ////ycocg to grb float x_z = Upsampledcw.x - Upsampledcw.z; @@ -312,5 +311,5 @@ void CS(uint3 globalInvocationID : SV_DispatchThreadID) float scale = preExposure / ((1.0f + 1.0f / 65504.0f) - compMax); //(1.0f + 1.0f / 65504.0f) = 1.000015e+00 Upsampledcw.xyz = Upsampledcw.xyz * scale; - SceneColorOutput[globalInvocationID.xy] = Upsampledcw; + SceneColorOutput[COORD_TEXTURE2D_X(gl_GlobalInvocationID.xy)] = Upsampledcw; } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl new file mode 100644 index 0000000..c1e5d71 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl @@ -0,0 +1,26 @@ +#include "UnityCG.cginc" + +#define TEXTURE2D_X(textureName) Texture2D textureName +#define TYPED_TEXTURE2D_X(type, textureName) Texture2D textureName +#define RW_TEXTURE2D_X(type, textureName) RWTexture2D textureName + +#define COORD_TEXTURE2D_X(pixelCoord) pixelCoord + +#define LOAD_TEXTURE2D_X(textureName, unCoord2) textureName[unCoord2] +#define SAMPLE_TEXTURE2D_X_LOD(textureName, samplerName, coord2, lod) textureName.SampleLevel(samplerName, coord2, lod) +#define GATHER_RED_TEXTURE2D_X(textureName, samplerName, coord2) textureName.GatherRed(samplerName, coord2) +#define GATHER_BLUE_TEXTURE2D_X_OFFSET(textureName, samplerName, coord2, offset) textureName.GatherBlue(samplerName, coord2, offset) + +SamplerState s_PointClamp : register(s0); +SamplerState s_LinearClamp : register(s1); + +#define S_POINT_CLAMP s_PointClamp +#define S_LINEAR_CLAMP s_LinearClamp + +#define UNITY_XR_ASSIGN_VIEW_INDEX(viewIndex) + +inline float2 decodeVelocityFromTexture(float2 ev) +{ + // Nothing to do, motion vectors are not encoded + return ev; +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl.meta new file mode 100644 index 0000000..c250b46 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: f3edae28a3f74031996d08ca5a87c28e +timeCreated: 1734775340 \ No newline at end of file From 9bf6a12c97e05c388075ee43b0558e840eacd7f8 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 21 Dec 2024 13:02:31 +0100 Subject: [PATCH 07/88] Minor optimization: use half float types wherever the original GLSL code uses mediump floats. --- .../SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute | 8 ++++---- .../SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute | 4 ++-- .../SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute | 4 ++-- .../SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute | 12 ++++++------ .../SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute | 4 ++-- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute index 8f076e7..a41837d 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute @@ -12,7 +12,7 @@ TEXTURE2D_X(InputColor) : register(t0); TYPED_TEXTURE2D_X(float, InputDepth) : register(t1); TYPED_TEXTURE2D_X(float2, InputVelocity) : register(t2); -RW_TEXTURE2D_X(float4, MotionDepthClipAlphaBuffer) : register(u0); +RW_TEXTURE2D_X(half4, MotionDepthClipAlphaBuffer) : register(u0); RW_TEXTURE2D_X(uint, YCoCgColor) : register(u1); CBUFFER_START(Params) @@ -36,7 +36,7 @@ void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) { UNITY_XR_ASSIGN_VIEW_INDEX(gl_GlobalInvocationID.z); - float Exposure_co_rcp = preExposure; + half Exposure_co_rcp = preExposure; float2 ViewportSizeInverse = displaySizeRcp.xy; uint2 InputPos = gl_GlobalInvocationID.xy; @@ -102,7 +102,7 @@ void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) } ////////////compute luma - float3 Colorrgb = LOAD_TEXTURE2D_X(InputColor, InputPos).xyz; + half3 Colorrgb = LOAD_TEXTURE2D_X(InputColor, InputPos).xyz; ///simple tonemap float ColorMax = max(max(Colorrgb.x, Colorrgb.y), Colorrgb.z) + Exposure_co_rcp; @@ -120,6 +120,6 @@ void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) YCoCgColor[COORD_TEXTURE2D_X(InputPos)] = ((x11 << 21u) | (y11 << 10u)) | z10; - float4 v29 = float4(motion, depthclip, ColorMax); + half4 v29 = half4(motion, depthclip, ColorMax); MotionDepthClipAlphaBuffer[COORD_TEXTURE2D_X(InputPos)] = v29; } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute index b4983ed..163af87 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute @@ -33,8 +33,8 @@ float3 DecodeColor(uint sample32) TEXTURE2D_X(PrevHistoryOutput) : register(t0); TEXTURE2D_X(MotionDepthClipAlphaBuffer) : register(t1); TYPED_TEXTURE2D_X(uint, YCoCgColor) : register(t2); -RW_TEXTURE2D_X(float4, SceneColorOutput) : register(u0); -RW_TEXTURE2D_X(float4, HistoryOutput) : register(u1); +RW_TEXTURE2D_X(half4, SceneColorOutput) : register(u0); +RW_TEXTURE2D_X(half4, HistoryOutput) : register(u1); CBUFFER_START(Params) uint2 renderSize; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute index fd2c83d..2304fbd 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute @@ -29,9 +29,9 @@ float2 unpackHalf2x16(uint x) } TYPED_TEXTURE2D_X(uint, PrevLumaHistory) : register(t0); -TEXTURE2D_X(MotionDepthAlphaBuffer) : register(t1); +TYPED_TEXTURE2D_X(half4, MotionDepthAlphaBuffer) : register(t1); TYPED_TEXTURE2D_X(uint, YCoCgColor) : register(t2); -RW_TEXTURE2D_X(float4, MotionDepthClipAlphaBuffer) : register(u0); +RW_TEXTURE2D_X(half4, MotionDepthClipAlphaBuffer) : register(u0); RW_TEXTURE2D_X(uint, LumaHistory) : register(u1); CBUFFER_START(Params) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute index 20ef3fd..2674a0c 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute @@ -13,7 +13,7 @@ TEXTURE2D_X(InputOpaqueColor) : register(t0); TEXTURE2D_X(InputColor) : register(t1); TYPED_TEXTURE2D_X(float, InputDepth) : register(t2); TYPED_TEXTURE2D_X(float2, InputVelocity) : register(t3); -RW_TEXTURE2D_X(float4, MotionDepthAlphaBuffer) : register(u0); +RW_TEXTURE2D_X(half4, MotionDepthAlphaBuffer) : register(u0); RW_TEXTURE2D_X(uint, YCoCgColor) : register(u1); CBUFFER_START(Params) @@ -91,7 +91,7 @@ void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) } ////////////compute luma - float3 Colorrgb = LOAD_TEXTURE2D_X(InputColor, InputPos).xyz; + half3 Colorrgb = LOAD_TEXTURE2D_X(InputColor, InputPos).xyz; ///simple tonemap Colorrgb /= max(max(Colorrgb.x, Colorrgb.y), Colorrgb.z) + h0; @@ -106,14 +106,14 @@ void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) uint y11 = uint(Colorycocg.y * 2047.5); uint z10 = uint(Colorycocg.z * 1023.5); - float3 Colorprergb = LOAD_TEXTURE2D_X(InputOpaqueColor, InputPos).xyz; + half3 Colorprergb = LOAD_TEXTURE2D_X(InputOpaqueColor, InputPos).xyz; ///simple tonemap Colorprergb /= max(max(Colorprergb.x, Colorprergb.y), Colorprergb.z) + h0; - float3 delta = abs(Colorrgb - Colorprergb); - float alpha_mask = max(delta.x, max(delta.y, delta.z)); + half3 delta = abs(Colorrgb - Colorprergb); + half alpha_mask = max(delta.x, max(delta.y, delta.z)); alpha_mask = (0.35f * 1000.0f) * alpha_mask; YCoCgColor[COORD_TEXTURE2D_X(InputPos)] = ((x11 << 21u) | (y11 << 10u)) | z10; - MotionDepthAlphaBuffer[COORD_TEXTURE2D_X(InputPos)] = float4(motion, NearestZ, alpha_mask); + MotionDepthAlphaBuffer[COORD_TEXTURE2D_X(InputPos)] = half4(motion, NearestZ, alpha_mask); } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute index 2417c57..4bc64f4 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute @@ -33,8 +33,8 @@ float3 DecodeColor(uint sample32) TEXTURE2D_X(PrevHistoryOutput) : register(t0); TEXTURE2D_X(MotionDepthClipAlphaBuffer) : register(t1); TYPED_TEXTURE2D_X(uint, YCoCgColor) : register(t2); -RW_TEXTURE2D_X(float4, SceneColorOutput) : register(u0); -RW_TEXTURE2D_X(float4, HistoryOutput) : register(u1); +RW_TEXTURE2D_X(half4, SceneColorOutput) : register(u0); +RW_TEXTURE2D_X(half4, HistoryOutput) : register(u1); CBUFFER_START(Params) uint2 renderSize; From c43c8d11f9e92cba2ebf15e21242b6152a41a5fb Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 21 Dec 2024 15:08:24 +0100 Subject: [PATCH 08/88] Reworked SGSR2 classes, moving more logic to the base class, and implemented clip-space delta matrix as well as the is-camera-still logic. --- .../Runtime/Effects/Upscaling/SGSR2/SGSR2.cs | 5 ++ .../Effects/Upscaling/SGSR2Upscaler.cs | 65 ++++++++++++++++++- .../Upscaling/SGSR2Upscaler_2PassCS.cs | 37 +---------- .../Upscaling/SGSR2Upscaler_3PassCS.cs | 37 +---------- 4 files changed, 75 insertions(+), 69 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs index d719336..8fd8c84 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs @@ -10,15 +10,20 @@ public static class SGSR2 { public Vector2Int renderSize; public Vector2Int displaySize; + public Vector2 renderSizeRcp; public Vector2 displaySizeRcp; + public Vector2 jitterOffset; public Vector2 padding1; + public Matrix4x4 clipToPrevClip; + public float preExposure; public float cameraFovAngleHor; public float cameraNear; public float minLerpContribution; + public uint bSameCamera; public uint reset; } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs index 615491e..b8dad18 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs @@ -8,6 +8,8 @@ namespace UnityEngine.Rendering.PostProcessing { public static bool IsSupported => SystemInfo.supportsComputeShaders; + protected abstract string VariantName { get; } + protected RenderTexture _colorLuma; protected RenderTexture _motionDepthAlpha; protected RenderTexture _motionDepthClipAlpha; @@ -17,7 +19,7 @@ namespace UnityEngine.Rendering.PostProcessing protected readonly ConstantsBuffer _paramsBuffer = new(); protected uint _frameCount = 0; - + public override void CreateContext(PostProcessRenderContext context, Upscaling config) { CreateRenderTexture(ref _colorLuma, "ColorLuma", config.MaxRenderSize, GraphicsFormat.R32_UInt, true); @@ -41,5 +43,66 @@ namespace UnityEngine.Rendering.PostProcessing DestroyRenderTexture(ref _motionDepthAlpha); DestroyRenderTexture(ref _colorLuma); } + + public override void Render(PostProcessRenderContext context, Upscaling config) + { + var cmd = context.command; + cmd.BeginSample(VariantName); + + Matrix4x4 clipToPrevClip = Matrix4x4.identity; + bool isCameraStill = false; + if (_frameCount > 0 && !config.Reset) + { + // We need to use the projection matrix as it is used on the GPU to match what Unity keeps in Camera.previousViewProjectionMatrix + Matrix4x4 viewProj = GL.GetGPUProjectionMatrix(context.camera.nonJitteredProjectionMatrix, true) * context.camera.worldToCameraMatrix; + clipToPrevClip = context.camera.previousViewProjectionMatrix * viewProj.inverse; + isCameraStill = IsCameraStill(viewProj, context.camera.previousViewProjectionMatrix); + } + + ref var parms = ref _paramsBuffer.Value; + parms.renderSize = config.GetScaledRenderSize(context.camera); + parms.displaySize = config.UpscaleSize; + parms.renderSizeRcp = new Vector2(1.0f / parms.renderSize.x, 1.0f / parms.renderSize.y); + parms.displaySizeRcp = new Vector2(1.0f / parms.displaySize.x, 1.0f / parms.displaySize.y); + parms.jitterOffset = config.JitterOffset; + parms.clipToPrevClip = clipToPrevClip; + parms.preExposure = config.preExposure; + parms.cameraFovAngleHor = Mathf.Tan(context.camera.fieldOfView * Mathf.Deg2Rad * 0.5f) * (float)parms.renderSize.x / parms.renderSize.y; + parms.cameraNear = context.camera.nearClipPlane; + parms.minLerpContribution = 0f; + parms.bSameCamera = isCameraStill ? 1u : 0u; + parms.reset = config.Reset ? 1u : 0u; + _paramsBuffer.UpdateBufferData(cmd); + + if (_frameCount == 0 || config.Reset) + { + cmd.SetRenderTarget(_lumaHistory[0]); + cmd.ClearRenderTarget(false, true, Color.clear); + cmd.SetRenderTarget(_lumaHistory[1]); + cmd.ClearRenderTarget(false, true, Color.clear); + cmd.SetRenderTarget(_upscaleHistory[0]); + cmd.ClearRenderTarget(false, true, Color.clear); + cmd.SetRenderTarget(_upscaleHistory[1]); + cmd.ClearRenderTarget(false, true, Color.clear); + } + + DoRender(cmd, context, config); + + cmd.EndSample(VariantName); + _frameCount++; + } + + protected abstract void DoRender(CommandBuffer cmd, PostProcessRenderContext context, Upscaling config); + + private static bool IsCameraStill(in Matrix4x4 currViewProj, in Matrix4x4 prevViewProj, float threshold = 1e-5f) + { + float vpDiff = 0f; + for (int i = 0; i < 16; i++) + { + vpDiff += Mathf.Abs(currViewProj[i] - prevViewProj[i]); + } + + return vpDiff < threshold; + } } } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs index c2a7600..a5666e1 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs @@ -4,43 +4,12 @@ namespace UnityEngine.Rendering.PostProcessing { internal class SGSR2Upscaler_2PassCS: SGSR2Upscaler { - public override void Render(PostProcessRenderContext context, Upscaling config) - { - var cmd = context.command; - cmd.BeginSample("SGSR2 2-Pass CS"); - - ref var parms = ref _paramsBuffer.Value; - parms.renderSize = config.GetScaledRenderSize(context.camera); - parms.displaySize = config.UpscaleSize; - parms.renderSizeRcp = new Vector2(1.0f / parms.renderSize.x, 1.0f / parms.renderSize.y); - parms.displaySizeRcp = new Vector2(1.0f / parms.displaySize.x, 1.0f / parms.displaySize.y); - parms.jitterOffset = config.JitterOffset; - parms.clipToPrevClip = Matrix4x4.identity; // TODO: clipToPrevClip => (previous_view_proj * inv_vp) - parms.preExposure = config.preExposure; - parms.cameraFovAngleHor = Mathf.Tan(context.camera.fieldOfView * Mathf.Deg2Rad * 0.5f) * (float)parms.renderSize.x / parms.renderSize.y; - parms.cameraNear = context.camera.nearClipPlane; - parms.minLerpContribution = 0f; - parms.bSameCamera = 0u; - parms.reset = config.Reset ? 1u : 0u; - _paramsBuffer.UpdateBufferData(cmd); - - if (_frameCount == 0 || config.Reset) - { - cmd.SetRenderTarget(_lumaHistory[0]); - cmd.ClearRenderTarget(false, true, Color.clear); - cmd.SetRenderTarget(_lumaHistory[1]); - cmd.ClearRenderTarget(false, true, Color.clear); - cmd.SetRenderTarget(_upscaleHistory[0]); - cmd.ClearRenderTarget(false, true, Color.clear); - cmd.SetRenderTarget(_upscaleHistory[1]); - cmd.ClearRenderTarget(false, true, Color.clear); - } + protected override string VariantName => "SGSR2 2-Pass CS"; + protected override void DoRender(CommandBuffer cmd, PostProcessRenderContext context, Upscaling config) + { Convert(cmd, context, config); Upscale(cmd, context); - - cmd.EndSample("SGSR2 2-Pass CS"); - _frameCount++; } private void Convert(CommandBuffer cmd, PostProcessRenderContext context, Upscaling config) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_3PassCS.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_3PassCS.cs index a41aac7..a32e2e3 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_3PassCS.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_3PassCS.cs @@ -4,44 +4,13 @@ namespace UnityEngine.Rendering.PostProcessing { internal class SGSR2Upscaler_3PassCS: SGSR2Upscaler { - public override void Render(PostProcessRenderContext context, Upscaling config) + protected override string VariantName => "SGSR2 3-Pass CS"; + + protected override void DoRender(CommandBuffer cmd, PostProcessRenderContext context, Upscaling config) { - var cmd = context.command; - cmd.BeginSample("SGSR2 3-Pass CS"); - - ref var parms = ref _paramsBuffer.Value; - parms.renderSize = config.GetScaledRenderSize(context.camera); - parms.displaySize = config.UpscaleSize; - parms.renderSizeRcp = new Vector2(1.0f / parms.renderSize.x, 1.0f / parms.renderSize.y); - parms.displaySizeRcp = new Vector2(1.0f / parms.displaySize.x, 1.0f / parms.displaySize.y); - parms.jitterOffset = config.JitterOffset; - parms.clipToPrevClip = Matrix4x4.identity; // TODO: clipToPrevClip => (previous_view_proj * inv_vp) - parms.preExposure = config.preExposure; - parms.cameraFovAngleHor = Mathf.Tan(context.camera.fieldOfView * Mathf.Deg2Rad * 0.5f) * (float)parms.renderSize.x / parms.renderSize.y; - parms.cameraNear = context.camera.nearClipPlane; - parms.minLerpContribution = 0f; - parms.bSameCamera = 0u; - parms.reset = config.Reset ? 1u : 0u; - _paramsBuffer.UpdateBufferData(cmd); - - if (_frameCount == 0 || config.Reset) - { - cmd.SetRenderTarget(_lumaHistory[0]); - cmd.ClearRenderTarget(false, true, Color.clear); - cmd.SetRenderTarget(_lumaHistory[1]); - cmd.ClearRenderTarget(false, true, Color.clear); - cmd.SetRenderTarget(_upscaleHistory[0]); - cmd.ClearRenderTarget(false, true, Color.clear); - cmd.SetRenderTarget(_upscaleHistory[1]); - cmd.ClearRenderTarget(false, true, Color.clear); - } - Convert(cmd, context, config); Activate(cmd, context); Upscale(cmd, context); - - cmd.EndSample("SGSR2 3-Pass CS"); - _frameCount++; } private void Convert(CommandBuffer cmd, PostProcessRenderContext context, Upscaling config) From 3285c552f408738f89969238896502cfeb8b98cc Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 21 Dec 2024 15:32:52 +0100 Subject: [PATCH 09/88] Verified motion vector "decoding" against the auto-generated camera MVs based on the clip-space delta matrix, and made a small fix --- .../Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl index c1e5d71..9c7711f 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl @@ -21,6 +21,6 @@ SamplerState s_LinearClamp : register(s1); inline float2 decodeVelocityFromTexture(float2 ev) { - // Nothing to do, motion vectors are not encoded - return ev; + // Nothing much to do, motion vectors are not encoded + return -ev; } From 0469bb921fbd4cc34cd9c69fc5ec07642fd6ee67 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 21 Dec 2024 16:37:09 +0100 Subject: [PATCH 10/88] Reverted use of CBUFFER_ macros, as it was breaking the shaders on Vulkan --- .../Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute | 5 +++-- .../Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute | 5 +++-- .../Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute | 5 +++-- .../Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute | 5 +++-- .../Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute | 5 +++-- 5 files changed, 15 insertions(+), 10 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute index a41837d..582c930 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute @@ -15,7 +15,8 @@ TYPED_TEXTURE2D_X(float2, InputVelocity) : register(t2); RW_TEXTURE2D_X(half4, MotionDepthClipAlphaBuffer) : register(u0); RW_TEXTURE2D_X(uint, YCoCgColor) : register(u1); -CBUFFER_START(Params) +cbuffer Params : register(b0) +{ uint2 renderSize; uint2 displaySize; float2 renderSizeRcp; @@ -29,7 +30,7 @@ CBUFFER_START(Params) float MinLerpContribution; uint bSameCamera; uint reset; -CBUFFER_END +}; [numthreads(8, 8, 1)] void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute index 163af87..c3a675e 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute @@ -36,7 +36,8 @@ TYPED_TEXTURE2D_X(uint, YCoCgColor) : register(t2); RW_TEXTURE2D_X(half4, SceneColorOutput) : register(u0); RW_TEXTURE2D_X(half4, HistoryOutput) : register(u1); -CBUFFER_START(Params) +cbuffer Params : register(b0) +{ uint2 renderSize; uint2 displaySize; float2 renderSizeRcp; @@ -50,7 +51,7 @@ CBUFFER_START(Params) float MinLerpContribution; uint bSameCamera; uint reset; -CBUFFER_END +}; [numthreads(8, 8, 1)] void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute index 2304fbd..380f122 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute @@ -34,7 +34,8 @@ TYPED_TEXTURE2D_X(uint, YCoCgColor) : register(t2); RW_TEXTURE2D_X(half4, MotionDepthClipAlphaBuffer) : register(u0); RW_TEXTURE2D_X(uint, LumaHistory) : register(u1); -CBUFFER_START(Params) +cbuffer Params : register(b0) +{ uint2 renderSize; uint2 displaySize; float2 ViewportSizeInverse; @@ -48,7 +49,7 @@ CBUFFER_START(Params) float MinLerpContribution; uint bSameCamera; uint reset; -CBUFFER_END +}; [numthreads(8, 8, 1)] void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute index 2674a0c..3038414 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute @@ -16,7 +16,8 @@ TYPED_TEXTURE2D_X(float2, InputVelocity) : register(t3); RW_TEXTURE2D_X(half4, MotionDepthAlphaBuffer) : register(u0); RW_TEXTURE2D_X(uint, YCoCgColor) : register(u1); -CBUFFER_START(Params) +cbuffer Params : register(b0) +{ uint2 renderSize; uint2 displaySize; float2 ViewportSizeInverse; @@ -30,7 +31,7 @@ CBUFFER_START(Params) float MinLerpContribution; uint bSameCamera; uint reset; -CBUFFER_END +}; [numthreads(8, 8, 1)] void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute index 4bc64f4..fbc1fed 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute @@ -36,7 +36,8 @@ TYPED_TEXTURE2D_X(uint, YCoCgColor) : register(t2); RW_TEXTURE2D_X(half4, SceneColorOutput) : register(u0); RW_TEXTURE2D_X(half4, HistoryOutput) : register(u1); -CBUFFER_START(Params) +cbuffer Params : register(b0) +{ uint2 renderSize; uint2 displaySize; float2 renderSizeRcp; @@ -50,7 +51,7 @@ CBUFFER_START(Params) float MinLerpContribution; uint bSameCamera; uint reset; -CBUFFER_END +}; [numthreads(8, 8, 1)] void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) From 8d6036c91ca250c2d93f26eba1c9d84fd6759580 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 21 Dec 2024 16:48:15 +0100 Subject: [PATCH 11/88] Moved a bunch of common definitions and functions into a separate file --- .../Shaders/2_pass_cs/sgsr2_convert.compute | 19 +------ .../Shaders/2_pass_cs/sgsr2_upscale.compute | 40 +------------- .../Shaders/3_pass_cs/sgsr2_activate.compute | 41 +------------- .../Shaders/3_pass_cs/sgsr2_convert.compute | 26 ++------- .../Shaders/3_pass_cs/sgsr2_upscale.compute | 40 +------------- .../Upscaling/SGSR2/Shaders/sgsr2_common.hlsl | 55 +++++++++++++++++++ .../SGSR2/Shaders/sgsr2_common.hlsl.meta | 3 + 7 files changed, 69 insertions(+), 155 deletions(-) create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute index 582c930..f5a64cb 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute @@ -1,5 +1,6 @@ #pragma kernel CS #include "../sgsr2_birp.hlsl" +#include "../sgsr2_common.hlsl" //============================================================================================================ // @@ -15,23 +16,6 @@ TYPED_TEXTURE2D_X(float2, InputVelocity) : register(t2); RW_TEXTURE2D_X(half4, MotionDepthClipAlphaBuffer) : register(u0); RW_TEXTURE2D_X(uint, YCoCgColor) : register(u1); -cbuffer Params : register(b0) -{ - uint2 renderSize; - uint2 displaySize; - float2 renderSizeRcp; - float2 displaySizeRcp; - float2 jitterOffset; - float2 padding1; - float4 clipToPrevClip[4]; - float preExposure; - float cameraFovAngleHor; - float cameraNear; - float MinLerpContribution; - uint bSameCamera; - uint reset; -}; - [numthreads(8, 8, 1)] void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) { @@ -72,7 +56,6 @@ void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) float Ksep_Kfov_diagonal = Ksep * Kfov * diagonal_length; float Depthsep = Ksep_Kfov_diagonal * (1.0 - maxC); - float EPSILON = 1.19e-07f; Wdepth += clamp((Depthsep / (abs(maxC - topleft4) + EPSILON)), 0.0, 1.0); Wdepth += clamp((Depthsep / (abs(maxC - topRight4) + EPSILON)), 0.0, 1.0); Wdepth += clamp((Depthsep / (abs(maxC - bottomLeft4) + EPSILON)), 0.0, 1.0); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute index c3a675e..b84cf7f 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute @@ -1,5 +1,6 @@ #pragma kernel CS #include "../sgsr2_birp.hlsl" +#include "../sgsr2_common.hlsl" //============================================================================================================ // @@ -9,50 +10,12 @@ // //============================================================================================================ -float FastLanczos(float base) -{ - float y = base - 1.0f; - float y2 = y * y; - float y_temp = 0.75f * y + y2; - return y_temp * y2; -} - -float3 DecodeColor(uint sample32) -{ - uint x11 = sample32 >> 21u; - uint y11 = sample32 & (2047u << 10u); - uint z10 = sample32 & 1023u; - float3 samplecolor; - samplecolor.x = (float(x11) * (1.0 / 2047.5)); - samplecolor.y = (float(y11) * (4.76953602e-7)) - 0.5; - samplecolor.z = (float(z10) * (1.0 / 1023.5)) - 0.5; - - return samplecolor; -} - TEXTURE2D_X(PrevHistoryOutput) : register(t0); TEXTURE2D_X(MotionDepthClipAlphaBuffer) : register(t1); TYPED_TEXTURE2D_X(uint, YCoCgColor) : register(t2); RW_TEXTURE2D_X(half4, SceneColorOutput) : register(u0); RW_TEXTURE2D_X(half4, HistoryOutput) : register(u1); -cbuffer Params : register(b0) -{ - uint2 renderSize; - uint2 displaySize; - float2 renderSizeRcp; - float2 displaySizeRcp; - float2 jitterOffset; - float2 padding1; - float4 clipToPrevClip[4]; - float preExposure; - float cameraFovAngleHor; - float cameraNear; - float MinLerpContribution; - uint bSameCamera; - uint reset; -}; - [numthreads(8, 8, 1)] void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) { @@ -290,7 +253,6 @@ void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) baseupdate = min(baseupdate, lerp(baseupdate, Upsampledcw.w, clamp(motion_viewport_len *0.05f, 0.0, 1.0))); float basealpha = baseupdate; - const float EPSILON = 1.192e-07f; float boxscale = max(depthfactor, clamp(motion_viewport_len * 0.05f, 0.0, 1.0)); float boxsize = lerp(scalefactor, 1.0f, boxscale); float3 sboxvar = rectboxvar * boxsize; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute index 380f122..240b40c 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute @@ -1,7 +1,6 @@ #pragma kernel CS #include "../sgsr2_birp.hlsl" - -// TODO: what about REQUEST_NDC_Y_UP? Might be graphics API-dependent, look at Unity's shader includes. +#include "../sgsr2_common.hlsl" //============================================================================================================ // @@ -11,46 +10,12 @@ // //============================================================================================================ -#define EPSILON 1.19e-07f -float DecodeColorY(uint sample32) -{ - uint x11 = sample32 >> 21u; - return float(x11) * (1.0 / 2047.5); -} - -uint packHalf2x16(float2 value) -{ - return f32tof16(value.x) | (f32tof16(value.y) << 16); -} - -float2 unpackHalf2x16(uint x) -{ - return f16tof32(uint2(x & 0xFFFF, x >> 16)); -} - TYPED_TEXTURE2D_X(uint, PrevLumaHistory) : register(t0); TYPED_TEXTURE2D_X(half4, MotionDepthAlphaBuffer) : register(t1); TYPED_TEXTURE2D_X(uint, YCoCgColor) : register(t2); RW_TEXTURE2D_X(half4, MotionDepthClipAlphaBuffer) : register(u0); RW_TEXTURE2D_X(uint, LumaHistory) : register(u1); -cbuffer Params : register(b0) -{ - uint2 renderSize; - uint2 displaySize; - float2 ViewportSizeInverse; - float2 displaySizeRcp; - float2 jitterOffset; - float2 padding1; - float4 clipToPrevClip[4]; - float preExposure; - float cameraFovAngleHor; - float cameraNear; - float MinLerpContribution; - uint bSameCamera; - uint reset; -}; - [numthreads(8, 8, 1)] void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) { @@ -65,8 +30,8 @@ void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) uint2 InputPos = gl_GlobalInvocationID.xy; - float2 ViewportUV = (float2(gl_GlobalInvocationID.xy) + 0.5f) * ViewportSizeInverse; - float2 gatherCoord = ViewportUV + 0.5f * ViewportSizeInverse; + float2 ViewportUV = (float2(gl_GlobalInvocationID.xy) + 0.5f) * renderSizeRcp; + float2 gatherCoord = ViewportUV + 0.5f * renderSizeRcp; uint luma_reference32 = GATHER_RED_TEXTURE2D_X(YCoCgColor, S_POINT_CLAMP, gatherCoord).w; float luma_reference = DecodeColorY(luma_reference32); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute index 3038414..395af7f 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute @@ -1,5 +1,6 @@ #pragma kernel CS #include "../sgsr2_birp.hlsl" +#include "../sgsr2_common.hlsl" //============================================================================================================ // @@ -16,23 +17,6 @@ TYPED_TEXTURE2D_X(float2, InputVelocity) : register(t3); RW_TEXTURE2D_X(half4, MotionDepthAlphaBuffer) : register(u0); RW_TEXTURE2D_X(uint, YCoCgColor) : register(u1); -cbuffer Params : register(b0) -{ - uint2 renderSize; - uint2 displaySize; - float2 ViewportSizeInverse; - float2 displaySizeRcp; - float2 jitterOffset; - float2 padding1; - float4 clipToPrevClip[4]; - float preExposure; - float cameraFovAngleHor; - float cameraNear; - float MinLerpContribution; - uint bSameCamera; - uint reset; -}; - [numthreads(8, 8, 1)] void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) { @@ -41,8 +25,8 @@ void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) half h0 = preExposure; uint2 InputPos = gl_GlobalInvocationID.xy; - float2 gatherCoord = float2(gl_GlobalInvocationID.xy) * ViewportSizeInverse; - float2 ViewportUV = gatherCoord + 0.5f * ViewportSizeInverse; + float2 gatherCoord = float2(gl_GlobalInvocationID.xy) * renderSizeRcp; + float2 ViewportUV = gatherCoord + 0.5f * renderSizeRcp; //derived from ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h //FindNearestDepth @@ -57,13 +41,13 @@ void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) NearestZ = max(topleft.z, NearestZ); NearestZ = max(topleft.w, NearestZ); - float2 v11 = float2(ViewportSizeInverse.x, 0.0); + float2 v11 = float2(renderSizeRcp.x, 0.0); float2 topRight = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, (gatherCoord + v11)).yz; NearestZ = max(topRight.x, NearestZ); NearestZ = max(topRight.y, NearestZ); - float2 v13 = float2(0.0, ViewportSizeInverse.y); + float2 v13 = float2(0.0, renderSizeRcp.y); float2 bottomLeft = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, (gatherCoord + v13)).xy; NearestZ = max(bottomLeft.x, NearestZ); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute index fbc1fed..8c87124 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute @@ -1,5 +1,6 @@ #pragma kernel CS #include "../sgsr2_birp.hlsl" +#include "../sgsr2_common.hlsl" //============================================================================================================ // @@ -9,50 +10,12 @@ // //============================================================================================================ -float FastLanczos(float base) -{ - float y = base - 1.0f; - float y2 = y * y; - float y_temp = 0.75f * y + y2; - return y_temp * y2; -} - -float3 DecodeColor(uint sample32) -{ - uint x11 = sample32 >> 21u; - uint y11 = sample32 & (2047u << 10u); - uint z10 = sample32 & 1023u; - float3 samplecolor; - samplecolor.x = (float(x11) * (1.0 / 2047.5)); - samplecolor.y = (float(y11) * (4.76953602e-7)) - 0.5; - samplecolor.z = (float(z10) * (1.0 / 1023.5)) - 0.5; - - return samplecolor; -} - TEXTURE2D_X(PrevHistoryOutput) : register(t0); TEXTURE2D_X(MotionDepthClipAlphaBuffer) : register(t1); TYPED_TEXTURE2D_X(uint, YCoCgColor) : register(t2); RW_TEXTURE2D_X(half4, SceneColorOutput) : register(u0); RW_TEXTURE2D_X(half4, HistoryOutput) : register(u1); -cbuffer Params : register(b0) -{ - uint2 renderSize; - uint2 displaySize; - float2 renderSizeRcp; - float2 displaySizeRcp; - float2 jitterOffset; - float2 padding1; - float4 clipToPrevClip[4]; - float preExposure; - float cameraFovAngleHor; - float cameraNear; - float MinLerpContribution; - uint bSameCamera; - uint reset; -}; - [numthreads(8, 8, 1)] void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) { @@ -277,7 +240,6 @@ void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) baseupdate = min(baseupdate, lerp(baseupdate, Upsampledcw.w, clamp(motion_viewport_len *0.05f, 0.0, 1.0))); float basealpha = baseupdate; - const float EPSILON = 1.192e-07f; float boxscale = max(depthfactor, clamp(motion_viewport_len * 0.05f, 0.0, 1.0)); float boxsize = lerp(scalefactor, 1.0f, boxscale); float3 sboxvar = rectboxvar * boxsize; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl new file mode 100644 index 0000000..27f282e --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl @@ -0,0 +1,55 @@ +#define EPSILON 1.192e-07f + +cbuffer Params : register(b0) +{ + uint2 renderSize; + uint2 displaySize; + float2 renderSizeRcp; + float2 displaySizeRcp; + float2 jitterOffset; + float2 padding1; + float4 clipToPrevClip[4]; + float preExposure; + float cameraFovAngleHor; + float cameraNear; + float MinLerpContribution; + uint bSameCamera; + uint reset; +}; + +float FastLanczos(float base) +{ + float y = base - 1.0f; + float y2 = y * y; + float y_temp = 0.75f * y + y2; + return y_temp * y2; +} + +float3 DecodeColor(uint sample32) +{ + uint x11 = sample32 >> 21u; + uint y11 = sample32 & (2047u << 10u); + uint z10 = sample32 & 1023u; + float3 samplecolor; + samplecolor.x = (float(x11) * (1.0 / 2047.5)); + samplecolor.y = (float(y11) * (4.76953602e-7)) - 0.5; + samplecolor.z = (float(z10) * (1.0 / 1023.5)) - 0.5; + + return samplecolor; +} + +float DecodeColorY(uint sample32) +{ + uint x11 = sample32 >> 21u; + return float(x11) * (1.0 / 2047.5); +} + +uint packHalf2x16(float2 value) +{ + return f32tof16(value.x) | (f32tof16(value.y) << 16); +} + +float2 unpackHalf2x16(uint x) +{ + return f16tof32(uint2(x & 0xFFFF, x >> 16)); +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl.meta new file mode 100644 index 0000000..d3bb9ed --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: bff676b8468748078a48f9d10bb7eabd +timeCreated: 1734795662 \ No newline at end of file From 8635b131f6859d42db62410429f4c5746de91ae6 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 21 Dec 2024 16:53:26 +0100 Subject: [PATCH 12/88] Renamed SGSR2 compute shader resources class, to allow an easier distinction between fragment and compute variants later on --- .../PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs | 2 +- .../PostProcessing/Runtime/PostProcessResources.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs index 8fd8c84..763e1cd 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs @@ -29,7 +29,7 @@ public static class SGSR2 } [Serializable] - public class Shaders + public class ComputeShaders { public TwoPassCompute twoPassCompute; public ThreePassCompute threePassCompute; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessResources.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessResources.cs index df7ee16..ce92f00 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessResources.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessResources.cs @@ -234,7 +234,7 @@ namespace UnityEngine.Rendering.PostProcessing /// /// Compute shaders used by the SnapDragon Game Super Resolution 2 (SGSR2) Upscaler. /// - public SGSR2.Shaders sgsr2Upscaler; + public SGSR2.ComputeShaders sgsr2Upscaler; /// /// Returns a copy of this class and its content. From 19b2969e580ffc3f44580e645963537a314204f7 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 21 Dec 2024 17:06:07 +0100 Subject: [PATCH 13/88] Added macros to change nearest-depth and depth-clipping logic based on whether the Unity graphics API uses reversed Z or not --- .../Shaders/2_pass_cs/sgsr2_convert.compute | 14 +++++++------- .../Shaders/3_pass_cs/sgsr2_activate.compute | 2 +- .../Shaders/3_pass_cs/sgsr2_convert.compute | 16 ++++++++-------- .../Upscaling/SGSR2/Shaders/sgsr2_common.hlsl | 8 ++++++++ 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute index f5a64cb..aa214fe 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute @@ -38,16 +38,16 @@ void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) float4 bottomLeft = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, (gatherCoord+v12)); float2 v14 = float2(ViewportSizeInverse.x*2.0, ViewportSizeInverse.y*2.0); float4 bottomRight = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, (gatherCoord+v14)); - float maxC = max(max(max(topleft.y,topRight.x),bottomLeft.z),bottomRight.w); - float topleft4 = max(max(max(topleft.y,topleft.x),topleft.z),topleft.w); - float topLeftMax9 = max(bottomLeft.w,max(max(maxC,topleft4),topRight.w)); + float maxC = DEPTH_NEAREST(DEPTH_NEAREST(DEPTH_NEAREST(topleft.y,topRight.x),bottomLeft.z),bottomRight.w); + float topleft4 = DEPTH_NEAREST(DEPTH_NEAREST(DEPTH_NEAREST(topleft.y,topleft.x),topleft.z),topleft.w); + float topLeftMax9 = DEPTH_NEAREST(bottomLeft.w,DEPTH_NEAREST(DEPTH_NEAREST(maxC,topleft4),topRight.w)); float depthclip = 0.0; - if (maxC > 1.0e-05f) + if (DEPTH_CLIP(maxC)) { - float topRight4 = max(max(max(topRight.y,topRight.x),topRight.z),topRight.w); - float bottomLeft4 = max(max(max(bottomLeft.y,bottomLeft.x),bottomLeft.z),bottomLeft.w); - float bottomRight4 = max(max(max(bottomRight.y,bottomRight.x),bottomRight.z),bottomRight.w); + float topRight4 = DEPTH_NEAREST(DEPTH_NEAREST(DEPTH_NEAREST(topRight.y,topRight.x),topRight.z),topRight.w); + float bottomLeft4 = DEPTH_NEAREST(DEPTH_NEAREST(DEPTH_NEAREST(bottomLeft.y,bottomLeft.x),bottomLeft.z),bottomLeft.w); + float bottomRight4 = DEPTH_NEAREST(DEPTH_NEAREST(DEPTH_NEAREST(bottomRight.y,bottomRight.x),bottomRight.z),bottomRight.w); float Wdepth = 0.0; float Ksep = 1.37e-05f; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute index 240b40c..7527649 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute @@ -47,7 +47,7 @@ void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) #endif float depthclip = 0.0; - if (depth > 1.0e-05f) { + if (DEPTH_CLIP(depth)) { float2 Prevf_sample = PrevUV * float2(renderSize) - 0.5f; float2 Prevfrac = Prevf_sample - floor(Prevf_sample); float OneMinusPrevfacx = 1.0 - Prevfrac.x; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute index 395af7f..33214e1 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute @@ -36,22 +36,22 @@ void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) float4 topleft = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, gatherCoord); - NearestZ = max(topleft.x, NearestZ); - NearestZ = max(topleft.y, NearestZ); - NearestZ = max(topleft.z, NearestZ); - NearestZ = max(topleft.w, NearestZ); + NearestZ = DEPTH_NEAREST(topleft.x, NearestZ); + NearestZ = DEPTH_NEAREST(topleft.y, NearestZ); + NearestZ = DEPTH_NEAREST(topleft.z, NearestZ); + NearestZ = DEPTH_NEAREST(topleft.w, NearestZ); float2 v11 = float2(renderSizeRcp.x, 0.0); float2 topRight = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, (gatherCoord + v11)).yz; - NearestZ = max(topRight.x, NearestZ); - NearestZ = max(topRight.y, NearestZ); + NearestZ = DEPTH_NEAREST(topRight.x, NearestZ); + NearestZ = DEPTH_NEAREST(topRight.y, NearestZ); float2 v13 = float2(0.0, renderSizeRcp.y); float2 bottomLeft = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, (gatherCoord + v13)).xy; - NearestZ = max(bottomLeft.x, NearestZ); - NearestZ = max(bottomLeft.y, NearestZ); + NearestZ = DEPTH_NEAREST(bottomLeft.x, NearestZ); + NearestZ = DEPTH_NEAREST(bottomLeft.y, NearestZ); //refer to ue/fsr2 PostProcessFFX_FSR2ConvertVelocity.usf, and using nearest depth for dilated motion diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl index 27f282e..56a5d1e 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl @@ -1,5 +1,13 @@ #define EPSILON 1.192e-07f +#ifdef UNITY_REVERSED_Z +#define DEPTH_NEAREST(a, b) max((a), (b)) +#define DEPTH_CLIP(depth) ((depth) > 1.0e-05f) +#else +#define DEPTH_NEAREST(a, b) min((a), (b)) +#define DEPTH_CLIP(depth) ((depth) < 1.0f - 1.0e-05f) +#endif + cbuffer Params : register(b0) { uint2 renderSize; From 5b203e7ef5ffc4b191f044d098af19cc32c026f1 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 21 Dec 2024 21:01:13 +0100 Subject: [PATCH 14/88] Set normalized device coordinates Y direction to be up, which is standard in Unity, and properly detect non-zero motion vector inputs. This *seems* to be the correct recipe. --- .../Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute | 2 +- .../Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute | 2 +- .../Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute index aa214fe..5d4711e 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute @@ -68,7 +68,7 @@ void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) float2 EncodedVelocity = LOAD_TEXTURE2D_X(InputVelocity, InputPos); float2 motion; - if (EncodedVelocity.x > 0.0) + if (any(abs(EncodedVelocity) > 0.0)) { motion = decodeVelocityFromTexture(EncodedVelocity.xy); } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute index 33214e1..e049225 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute @@ -58,7 +58,7 @@ void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) float2 EncodedVelocity = LOAD_TEXTURE2D_X(InputVelocity, InputPos); float2 motion; - if (EncodedVelocity.x > 0.0) + if (any(abs(EncodedVelocity) > 0.0)) { motion = decodeVelocityFromTexture(EncodedVelocity.xy); } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl index 56a5d1e..c79d47d 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl @@ -1,5 +1,7 @@ #define EPSILON 1.192e-07f +#define REQUEST_NDC_Y_UP + #ifdef UNITY_REVERSED_Z #define DEPTH_NEAREST(a, b) max((a), (b)) #define DEPTH_CLIP(depth) ((depth) > 1.0e-05f) From 80a8532cb4f41e8b7e312ac6ab5512ff44b1d5d2 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 21 Dec 2024 21:07:50 +0100 Subject: [PATCH 15/88] Cleaned up an unused argument --- .../Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs index a5666e1..7e4dfbe 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs @@ -8,11 +8,11 @@ namespace UnityEngine.Rendering.PostProcessing protected override void DoRender(CommandBuffer cmd, PostProcessRenderContext context, Upscaling config) { - Convert(cmd, context, config); + Convert(cmd, context); Upscale(cmd, context); } - private void Convert(CommandBuffer cmd, PostProcessRenderContext context, Upscaling config) + private void Convert(CommandBuffer cmd, PostProcessRenderContext context) { var shader = context.resources.computeShaders.sgsr2Upscaler.twoPassCompute.convert; int kernelIndex = shader.FindKernel("CS"); From 4f8cf086bf1a9c66077753ee70c6050759ad3f2c Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 21 Dec 2024 21:12:57 +0100 Subject: [PATCH 16/88] More specifically, set NDC Y-coordinate to be up when the Unity graphics API has UV's starting at the top --- .../Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl index c79d47d..1a8902e 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl @@ -1,8 +1,10 @@ #define EPSILON 1.192e-07f +#if UNITY_UV_STARTS_AT_TOP #define REQUEST_NDC_Y_UP +#endif -#ifdef UNITY_REVERSED_Z +#if UNITY_REVERSED_Z #define DEPTH_NEAREST(a, b) max((a), (b)) #define DEPTH_CLIP(depth) ((depth) > 1.0e-05f) #else From ed566a8beb8a8e16a04d9adfeb9e4794072af161 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sun, 22 Dec 2024 12:58:30 +0100 Subject: [PATCH 17/88] Empirically determined the best setup for motion vectors in OpenGL, though it still doesn't make a whole lot of sense. Added opt-in debug symbols line to the upscale shaders to make analysis easier. --- .../Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute | 1 + .../Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute | 1 + .../Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute index b84cf7f..a73bdf1 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute @@ -1,4 +1,5 @@ #pragma kernel CS +//#pragma enable_d3d11_debug_symbols #include "../sgsr2_birp.hlsl" #include "../sgsr2_common.hlsl" diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute index 8c87124..d96307d 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute @@ -1,4 +1,5 @@ #pragma kernel CS +//#pragma enable_d3d11_debug_symbols #include "../sgsr2_birp.hlsl" #include "../sgsr2_common.hlsl" diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl index 9c7711f..19823cd 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl @@ -22,5 +22,9 @@ SamplerState s_LinearClamp : register(s1); inline float2 decodeVelocityFromTexture(float2 ev) { // Nothing much to do, motion vectors are not encoded +#if UNITY_UV_STARTS_AT_TOP return -ev; +#else + return ev; +#endif } From d354d07d818cfa5da319365b31d401444a167d97 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sun, 22 Dec 2024 13:24:02 +0100 Subject: [PATCH 18/88] Added scaleRatio to unify the cbuffer definition for all shaders, including the fragment shader variant. --- .../Runtime/Effects/Upscaling/SGSR2/SGSR2.cs | 3 ++- .../Shaders/2_pass_cs/sgsr2_upscale.compute | 2 +- .../Upscaling/SGSR2/Shaders/sgsr2_common.hlsl | 27 ++++++++++--------- .../Effects/Upscaling/SGSR2Upscaler.cs | 3 ++- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs index 763e1cd..8100bac 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs @@ -23,7 +23,8 @@ public static class SGSR2 public float cameraFovAngleHor; public float cameraNear; public float minLerpContribution; - + + public Vector2 scaleRatio; public uint bSameCamera; public uint reset; } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute index a73bdf1..11d11c2 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute @@ -263,7 +263,7 @@ void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) rectboxmin = max(rectboxmin, boxmin); float3 clampedcolor = clamp(HistoryColor, rectboxmin, rectboxmax); - float startLerpValue = MinLerpContribution; //MinLerpContribution; //MinLerpContribution; + float startLerpValue = minLerpContribution; //MinLerpContribution; //MinLerpContribution; if ((abs(mda.x) + abs(mda.y)) > 0.000001) startLerpValue = 0.0; float lerpcontribution = (any(rectboxmin > HistoryColor) || any(HistoryColor > rectboxmax)) ? startLerpValue : 1.0f; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl index 1a8902e..befb867 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl @@ -14,19 +14,20 @@ cbuffer Params : register(b0) { - uint2 renderSize; - uint2 displaySize; - float2 renderSizeRcp; - float2 displaySizeRcp; - float2 jitterOffset; - float2 padding1; - float4 clipToPrevClip[4]; - float preExposure; - float cameraFovAngleHor; - float cameraNear; - float MinLerpContribution; - uint bSameCamera; - uint reset; + uint2 renderSize; + uint2 displaySize; + float2 renderSizeRcp; + float2 displaySizeRcp; + float2 jitterOffset; + float2 padding1; + float4 clipToPrevClip[4]; + float preExposure; + float cameraFovAngleHor; + float cameraNear; + float minLerpContribution; + float2 scaleRatio; + uint bSameCamera; + uint reset; }; float FastLanczos(float base) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs index b8dad18..c386d59 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs @@ -67,9 +67,10 @@ namespace UnityEngine.Rendering.PostProcessing parms.jitterOffset = config.JitterOffset; parms.clipToPrevClip = clipToPrevClip; parms.preExposure = config.preExposure; - parms.cameraFovAngleHor = Mathf.Tan(context.camera.fieldOfView * Mathf.Deg2Rad * 0.5f) * (float)parms.renderSize.x / parms.renderSize.y; + parms.cameraFovAngleHor = Mathf.Tan(context.camera.fieldOfView * Mathf.Deg2Rad * 0.5f) * parms.renderSize.x * parms.renderSizeRcp.y; parms.cameraNear = context.camera.nearClipPlane; parms.minLerpContribution = 0f; + parms.scaleRatio = new Vector2(parms.renderSize.x * parms.displaySizeRcp.x, parms.renderSize.y * parms.displaySizeRcp.y); parms.bSameCamera = isCameraStill ? 1u : 0u; parms.reset = config.Reset ? 1u : 0u; _paramsBuffer.UpdateBufferData(cmd); From 05989857c0dc54b326b8f05005eb1fa12a7e8a52 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sun, 22 Dec 2024 13:42:14 +0100 Subject: [PATCH 19/88] Added an untested and as of yet unused HDRP include file, just as an expression of what this file will look like for SRPs --- .../Upscaling/SGSR2/Shaders/sgsr2_hdrp.hlsl | 18 ++++++++++++++++++ .../SGSR2/Shaders/sgsr2_hdrp.hlsl.meta | 3 +++ 2 files changed, 21 insertions(+) create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_hdrp.hlsl create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_hdrp.hlsl.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_hdrp.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_hdrp.hlsl new file mode 100644 index 0000000..e0531ab --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_hdrp.hlsl @@ -0,0 +1,18 @@ +#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl" +#include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl" + +// Using renderSizeRcp here is a bit of a hack, but the SRPs don't offer any macros for gather with offset, and we know which texture the GatherBlue will be used for +#define GATHER_BLUE_TEXTURE2D_X_OFFSET(textureName, samplerName, coord2, offset) GATHER_BLUE_TEXTURE2D_X(textureName, samplerName, coord2 + offset * renderSizeRcp) + +#define S_POINT_CLAMP s_point_clamp_sampler +#define S_LINEAR_CLAMP s_linear_clamp_sampler + +inline float2 decodeVelocityFromTexture(float2 ev) +{ + // Nothing much to do, motion vectors are not encoded +#if UNITY_UV_STARTS_AT_TOP + return -ev; +#else + return ev; +#endif +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_hdrp.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_hdrp.hlsl.meta new file mode 100644 index 0000000..82f81ea --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_hdrp.hlsl.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: ae8ec449c111471fb8eecbd6142cb9ad +timeCreated: 1734870531 \ No newline at end of file From a56f0742b5d22e78ffae33a80fc9a6a3919dc47b Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sun, 22 Dec 2024 16:30:50 +0100 Subject: [PATCH 20/88] Further tweaked motion vector processing, now the camera motion vectors from Unity produce an exact match with the auto-generated motion from the clip-delta matrix, in both D3D, Vulkan and OpenGL. It also makes sense with regards to the flipped Y-coordinate in screen space now. The final result looks good now too. --- .../Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl | 5 ++--- .../Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_hdrp.hlsl | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl index 19823cd..be757a0 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl @@ -21,10 +21,9 @@ SamplerState s_LinearClamp : register(s1); inline float2 decodeVelocityFromTexture(float2 ev) { - // Nothing much to do, motion vectors are not encoded #if UNITY_UV_STARTS_AT_TOP - return -ev; + return float2(ev.x, -ev.y) * 2.0f; #else - return ev; + return ev * 2.0f; #endif } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_hdrp.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_hdrp.hlsl index e0531ab..6133399 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_hdrp.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_hdrp.hlsl @@ -9,10 +9,9 @@ inline float2 decodeVelocityFromTexture(float2 ev) { - // Nothing much to do, motion vectors are not encoded #if UNITY_UV_STARTS_AT_TOP - return -ev; + return float2(ev.x, -ev.y) * 2.0f; #else - return ev; + return ev * 2.0f; #endif } From a0fa2c9e60fc5c354d40eb688b78d6464fc8c802 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Mon, 23 Dec 2024 16:10:42 +0100 Subject: [PATCH 21/88] First import of GLSL fragment shaders plus some initial Unity shader setup --- .../Upscaling/SGSR2/Shaders/2_pass_fs.meta | 8 + .../SGSR2/Shaders/2_pass_fs/sgsr2.shader | 51 ++++ .../SGSR2/Shaders/2_pass_fs/sgsr2.shader.meta | 9 + .../Shaders/2_pass_fs/sgsr2_convert.hlsl | 116 +++++++ .../Shaders/2_pass_fs/sgsr2_convert.hlsl.meta | 7 + .../Shaders/2_pass_fs/sgsr2_upscale.hlsl | 286 ++++++++++++++++++ .../Shaders/2_pass_fs/sgsr2_upscale.hlsl.meta | 7 + .../SGSR2/Shaders/2_pass_fs/sgsr2_vertex.vs | 21 ++ .../Shaders/2_pass_fs/sgsr2_vertex.vs.meta | 7 + 9 files changed, 512 insertions(+) create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2.shader create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2.shader.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_vertex.vs create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_vertex.vs.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs.meta new file mode 100644 index 0000000..7344082 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 097742e23f344d0408435f99f89e1edb +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2.shader b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2.shader new file mode 100644 index 0000000..f6c9e25 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2.shader @@ -0,0 +1,51 @@ +Shader "TND/sgsr2_2pass_fs" +{ + Properties + { + _MainTex ("Texture", 2D) = "white" {} + } + SubShader + { + Cull Off ZWrite Off ZTest Always + + Pass // Convert + { + CGPROGRAM + #pragma vertex vert_img + #pragma fragment frag + + #include "UnityCG.cginc" + + sampler2D _MainTex; + + fixed4 frag (v2f_img i) : SV_Target + { + fixed4 col = tex2D(_MainTex, i.uv); + // just invert the colors + col.rgb = 1 - col.rgb; + return col; + } + ENDCG + } + + Pass // Upscale + { + CGPROGRAM + #pragma vertex vert_img + #pragma fragment frag + + #include "UnityCG.cginc" + + sampler2D _MainTex; + + fixed4 frag (v2f_img i) : SV_Target + { + fixed4 col = tex2D(_MainTex, i.uv); + // just invert the colors + col.rgb = 1 - col.rgb; + return col; + } + ENDCG + } + } +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2.shader.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2.shader.meta new file mode 100644 index 0000000..435c493 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2.shader.meta @@ -0,0 +1,9 @@ +fileFormatVersion: 2 +guid: 9e367486dadedbc4da8313a481aa8a27 +ShaderImporter: + externalObjects: {} + defaultTextures: [] + nonModifiableTextures: [] + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl new file mode 100644 index 0000000..dbbfbf1 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl @@ -0,0 +1,116 @@ +#include "../sgsr2_birp.hlsl" +#include "../sgsr2_common.hlsl" + +//============================================================================================================ +// +// +// Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. +// SPDX-License-Identifier: BSD-3-Clause +// +//============================================================================================================ + +precision highp float; +precision highp int; + +layout(location = 0) out vec4 MotionDepthClipAlphaBuffer; +layout(location = 0) in highp vec2 texCoord; + +layout(set = 0, binding = 1) uniform mediump sampler2D InputDepth; +layout(set = 0, binding = 2) uniform mediump sampler2D InputVelocity; + +layout(std140, set = 0, binding = 0) uniform Params +{ + vec4 clipToPrevClip[4]; + vec2 renderSize; + vec2 outputSize; + vec2 renderSizeRcp; + vec2 outputSizeRcp; + vec2 jitterOffset; + vec2 scaleRatio; + float cameraFovAngleHor; + float minLerpContribution; + float reset; + uint bSameCamera; +} params; + +vec2 decodeVelocityFromTexture(vec2 ev) { + const float inv_div = 1.0f / (0.499f * 0.5f); + vec2 dv; + dv.xy = ev.xy * inv_div - 32767.0f / 65535.0f * inv_div; + //dv.z = uintBitsToFloat((uint(round(ev.z * 65535.0f)) << 16) | uint(round(ev.w * 65535.0f))); + return dv; +} + +void main() +{ + uvec2 InputPos = uvec2(texCoord * params.renderSize); + vec2 gatherCoord = texCoord - vec2(0.5) * params.renderSizeRcp; + + + // texture gather to find nearest depth + // a b c d + // e f g h + // i j k l + // m n o p + //btmLeft mnji + //btmRight oplk + //topLeft efba + //topRight ghdc + + vec4 btmLeft = textureGather(InputDepth, gatherCoord, 0); + vec2 v10 = vec2(params.renderSizeRcp.x * 2.0f, 0.0); + vec4 btmRight = textureGather(InputDepth,(gatherCoord+v10), 0); + vec2 v12 = vec2(0.0, params.renderSizeRcp.y * 2.0f); + vec4 topLeft = textureGather(InputDepth,(gatherCoord+v12), 0); + vec2 v14 = vec2(params.renderSizeRcp.x * 2.0f, params.renderSizeRcp.y * 2.0f); + vec4 topRight = textureGather(InputDepth,(gatherCoord+v14), 0); + float maxC = min(min(min(btmLeft.z,btmRight.w),topLeft.y),topRight.x); + float btmLeft4 = min(min(min(btmLeft.y,btmLeft.x),btmLeft.z),btmLeft.w); + float btmLeftMax9 = min(topLeft.x,min(min(maxC,btmLeft4),btmRight.x)); + + float depthclip = 0.0; + if (maxC < 1.0 - 1.0e-05f) + { + float btmRight4 = min(min(min(btmRight.y,btmRight.x),btmRight.z),btmRight.w); + float topLeft4 = min(min(min(topLeft.y,topLeft.x),topLeft.z),topLeft.w); + float topRight4 = min(min(min(topRight.y,topRight.x),topRight.z),topRight.w); + + float Wdepth = 0.0; + float Ksep = 1.37e-05f; + float Kfov = params.cameraFovAngleHor; + float diagonal_length = length(params.renderSize); + float Ksep_Kfov_diagonal = Ksep * Kfov * diagonal_length; + + float Depthsep = Ksep_Kfov_diagonal * (1.0 - maxC); + float EPSILON = 1.19e-07f; + Wdepth += clamp((Depthsep / (abs(maxC - btmLeft4) + EPSILON)), 0.0, 1.0); + Wdepth += clamp((Depthsep / (abs(maxC - btmRight4) + EPSILON)), 0.0, 1.0); + Wdepth += clamp((Depthsep / (abs(maxC - topLeft4) + EPSILON)), 0.0, 1.0); + Wdepth += clamp((Depthsep / (abs(maxC - topRight4) + EPSILON)), 0.0, 1.0); + depthclip = clamp(1.0f - Wdepth * 0.25, 0.0, 1.0); + } + + //refer to ue/fsr2 PostProcessFFX_FSR2ConvertVelocity.usf, and using nearest depth for dilated motion + + vec4 EncodedVelocity = texelFetch(InputVelocity, ivec2(InputPos), 0); + + vec2 motion; + if (EncodedVelocity.x > 0.0) + { + motion = decodeVelocityFromTexture(EncodedVelocity.xy); + } + else + { +#ifdef REQUEST_NDC_Y_UP + vec2 ScreenPos = vec2(2.0f * texCoord.x - 1.0f, 1.0f - 2.0f * texCoord.y); +#else + vec2 ScreenPos = vec2(2.0f * texCoord - 1.0f); +#endif + vec3 Position = vec3(ScreenPos, btmLeftMax9); //this_clip + vec4 PreClip = params.clipToPrevClip[3] + ((params.clipToPrevClip[2] * Position.z) + ((params.clipToPrevClip[1] * ScreenPos.y) + (params.clipToPrevClip[0] * ScreenPos.x))); + vec2 PreScreen = PreClip.xy / PreClip.w; + motion = Position.xy - PreScreen; + } + MotionDepthClipAlphaBuffer = vec4(motion, depthclip, 0.0); + +} \ No newline at end of file diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl.meta new file mode 100644 index 0000000..89cdb2c --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 3e8c4c408c337364291ae0e57dc25f28 +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl new file mode 100644 index 0000000..d3921a1 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl @@ -0,0 +1,286 @@ +#include "../sgsr2_birp.hlsl" +#include "../sgsr2_common.hlsl" + +//============================================================================================================ +// +// +// Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. +// SPDX-License-Identifier: BSD-3-Clause +// +//============================================================================================================ + +precision mediump float; +precision highp int; + +float FastLanczos(float base) +{ + float y = base - 1.0f; + float y2 = y * y; + float y_temp = 0.75f * y + y2; + return y_temp * y2; +} + +layout(location = 0) out mediump vec4 Output; +layout(location = 0) in highp vec2 texCoord; + +layout(set = 0, binding = 1) uniform mediump sampler2D PrevOutput; +layout(set = 0, binding = 2) uniform mediump sampler2D MotionDepthClipAlphaBuffer; +layout(set = 0, binding = 3) uniform mediump sampler2D InputColor; + +layout(std140, set = 0, binding = 0) uniform readonly Params +{ + highp vec4 clipToPrevClip[4]; + highp vec2 renderSize; + highp vec2 outputSize; + highp vec2 renderSizeRcp; + highp vec2 outputSizeRcp; + highp vec2 jitterOffset; + highp vec2 scaleRatio; + highp float cameraFovAngleHor; + highp float minLerpContribution; + highp float reset; + uint bSameCamera; +} params; + +void main() +{ + float Biasmax_viewportXScale = params.scaleRatio.x; + float scalefactor = params.scaleRatio.y; + + highp vec2 Hruv = texCoord; + + highp vec2 Jitteruv; + Jitteruv.x = clamp(Hruv.x + (params.jitterOffset.x * params.outputSizeRcp.x), 0.0, 1.0); + Jitteruv.y = clamp(Hruv.y + (params.jitterOffset.y * params.outputSizeRcp.y), 0.0, 1.0); + + highp ivec2 InputPos = ivec2(Jitteruv * params.renderSize); + + highp vec3 mda = textureLod(MotionDepthClipAlphaBuffer, Jitteruv, 0.0).xyz; + highp vec2 Motion = mda.xy; + + highp vec2 PrevUV; + PrevUV.x = clamp(-0.5 * Motion.x + Hruv.x, 0.0, 1.0); +#ifdef REQUEST_NDC_Y_UP + PrevUV.y = clamp(0.5 * Motion.y + Hruv.y, 0.0, 1.0); +#else + PrevUV.y = clamp(-0.5 * Motion.y + Hruv.y, 0.0, 1.0); +#endif + + float depthfactor = mda.z; + + vec3 HistoryColor = textureLod(PrevOutput, PrevUV, 0.0).xyz; + + /////upsample and compute box + vec4 Upsampledcw = vec4(0.0); + float biasmax = Biasmax_viewportXScale ; + float biasmin = max(1.0f, 0.3 + 0.3 * biasmax); + float biasfactor = 0.25f * depthfactor; + float kernelbias = mix(biasmax, biasmin, biasfactor); + float motion_viewport_len = length(Motion * params.outputSize); + float curvebias = mix(-2.0, -3.0, clamp(motion_viewport_len * 0.02, 0.0, 1.0)); + + vec3 rectboxcenter = vec3(0.0); + vec3 rectboxvar = vec3(0.0); + float rectboxweight = 0.0; + highp vec2 srcpos = vec2(InputPos) + vec2(0.5) - params.jitterOffset; + + kernelbias *= 0.5f; + float kernelbias2 = kernelbias * kernelbias; + vec2 srcpos_srcOutputPos = srcpos - Hruv * params.renderSize; //srcOutputPos = Hruv * params.renderSize; + vec3 rectboxmin; + vec3 rectboxmax; + vec3 topMid = texelFetch(InputColor, InputPos + ivec2(0, 1), 0).xyz; + { + + vec3 samplecolor = topMid; + vec2 baseoffset = srcpos_srcOutputPos + vec2(0.0, 1.0); + float baseoffset_dot = dot(baseoffset, baseoffset); + float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + float weight = FastLanczos(base); + Upsampledcw += vec4(samplecolor * weight, weight); + float boxweight = exp(baseoffset_dot * curvebias); + rectboxmin = samplecolor; + rectboxmax = samplecolor; + vec3 wsample = samplecolor * boxweight; + rectboxcenter += wsample; + rectboxvar += (samplecolor * wsample); + rectboxweight += boxweight; + } + vec3 rightMid = texelFetch(InputColor, InputPos + ivec2(1, 0), 0).xyz; + { + + vec3 samplecolor = rightMid; + vec2 baseoffset = srcpos_srcOutputPos + vec2(1.0, 0.0); + float baseoffset_dot = dot(baseoffset, baseoffset); + float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + float weight = FastLanczos(base); + Upsampledcw += vec4(samplecolor * weight, weight); + float boxweight = exp(baseoffset_dot * curvebias); + rectboxmin = min(rectboxmin, samplecolor); + rectboxmax = max(rectboxmax, samplecolor); + vec3 wsample = samplecolor * boxweight; + rectboxcenter += wsample; + rectboxvar += (samplecolor * wsample); + rectboxweight += boxweight; + } + vec3 leftMid = texelFetch(InputColor, InputPos + ivec2(-1, 0) , 0).xyz; + { + + vec3 samplecolor = leftMid; + vec2 baseoffset = srcpos_srcOutputPos + vec2(-1.0, 0.0); + float baseoffset_dot = dot(baseoffset, baseoffset); + float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + float weight = FastLanczos(base); + Upsampledcw += vec4(samplecolor * weight, weight); + float boxweight = exp(baseoffset_dot * curvebias); + rectboxmin = min(rectboxmin, samplecolor); + rectboxmax = max(rectboxmax, samplecolor); + vec3 wsample = samplecolor * boxweight; + rectboxcenter += wsample; + rectboxvar += (samplecolor * wsample); + rectboxweight += boxweight; + } + vec3 centerMid = texelFetch(InputColor, InputPos + ivec2(0, 0) , 0).xyz; + { + + vec3 samplecolor = centerMid; + vec2 baseoffset = srcpos_srcOutputPos; + float baseoffset_dot = dot(baseoffset, baseoffset); + float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + float weight = FastLanczos(base); + Upsampledcw += vec4(samplecolor * weight, weight); + float boxweight = exp(baseoffset_dot * curvebias); + rectboxmin = min(rectboxmin, samplecolor); + rectboxmax = max(rectboxmax, samplecolor); + vec3 wsample = samplecolor * boxweight; + rectboxcenter += wsample; + rectboxvar += (samplecolor * wsample); + rectboxweight += boxweight; + } + vec3 btmMid = texelFetch(InputColor, InputPos + ivec2(0, -1) , 0).xyz; + { + + vec3 samplecolor = btmMid; + vec2 baseoffset = srcpos_srcOutputPos + vec2(0.0, -1.0); + float baseoffset_dot = dot(baseoffset, baseoffset); + float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + float weight = FastLanczos(base); + Upsampledcw += vec4(samplecolor * weight, weight); + float boxweight = exp(baseoffset_dot * curvebias); + rectboxmin = min(rectboxmin, samplecolor); + rectboxmax = max(rectboxmax, samplecolor); + vec3 wsample = samplecolor * boxweight; + rectboxcenter += wsample; + rectboxvar += (samplecolor * wsample); + rectboxweight += boxweight; + } + + //if (params.sameCameraFrmNum!=0u) //maybe disable this for ultra performance + if (false) //maybe disable this for ultra performance, true could generate more realistic output + { + { + vec3 topRight = texelFetch(InputColor, InputPos + ivec2(1, 1), 0).xyz; + vec3 samplecolor = topRight; + vec2 baseoffset = srcpos_srcOutputPos + vec2(1.0, 1.0); + float baseoffset_dot = dot(baseoffset, baseoffset); + float base = clamp(baseoffset_dot * kernelbias2, 0.0, 1.0); + float weight = FastLanczos(base); + Upsampledcw += vec4(samplecolor * weight, weight); + float boxweight = exp(baseoffset_dot * curvebias); + rectboxmin = min(rectboxmin, samplecolor); + rectboxmax = max(rectboxmax, samplecolor); + vec3 wsample = samplecolor * boxweight; + rectboxcenter += wsample; + rectboxvar += (samplecolor * wsample); + rectboxweight += boxweight; + } + { + vec3 topLeft = texelFetch(InputColor, InputPos + ivec2(-1, 1), 0).xyz; + vec3 samplecolor = topLeft; + vec2 baseoffset = srcpos_srcOutputPos + vec2(-1.0, 1.0); + float baseoffset_dot = dot(baseoffset, baseoffset); + float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + float weight = FastLanczos(base); + Upsampledcw += vec4(samplecolor * weight, weight); + float boxweight = exp(baseoffset_dot * curvebias); + rectboxmin = min(rectboxmin, samplecolor); + rectboxmax = max(rectboxmax, samplecolor); + vec3 wsample = samplecolor * boxweight; + rectboxcenter += wsample; + rectboxvar += (samplecolor * wsample); + rectboxweight += boxweight; + } + { + vec3 btmRight = texelFetch(InputColor, InputPos + ivec2(1, -1) , 0).xyz; + vec3 samplecolor = btmRight; + vec2 baseoffset = srcpos_srcOutputPos + vec2(1.0, -1.0); + float baseoffset_dot = dot(baseoffset, baseoffset); + float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + float weight = FastLanczos(base); + Upsampledcw += vec4(samplecolor * weight, weight); + float boxweight = exp(baseoffset_dot * curvebias); + rectboxmin = min(rectboxmin, samplecolor); + rectboxmax = max(rectboxmax, samplecolor); + vec3 wsample = samplecolor * boxweight; + rectboxcenter += wsample; + rectboxvar += (samplecolor * wsample); + rectboxweight += boxweight; + } + + { + vec3 btmLeft = texelFetch(InputColor, InputPos + ivec2(-1, -1) , 0).xyz; + vec3 samplecolor = btmLeft; + vec2 baseoffset = srcpos_srcOutputPos + vec2(-1.0, -1.0); + float baseoffset_dot = dot(baseoffset, baseoffset); + float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + float weight = FastLanczos(base); + Upsampledcw += vec4(samplecolor * weight, weight); + float boxweight = exp(baseoffset_dot * curvebias); + rectboxmin = min(rectboxmin, samplecolor); + rectboxmax = max(rectboxmax, samplecolor); + vec3 wsample = samplecolor * boxweight; + rectboxcenter += wsample; + rectboxvar += (samplecolor * wsample); + rectboxweight += boxweight; + } + } + + rectboxweight = 1.0 / rectboxweight; + rectboxcenter *= rectboxweight; + rectboxvar *= rectboxweight; + rectboxvar = sqrt(abs(rectboxvar - rectboxcenter * rectboxcenter)); + + Upsampledcw.xyz = clamp(Upsampledcw.xyz / Upsampledcw.w, rectboxmin-vec3(0.075), rectboxmax+vec3(0.075)); + Upsampledcw.w = Upsampledcw.w * (1.0f / 3.0f) ; + + float baseupdate = 1.0f - depthfactor; + baseupdate = min(baseupdate, mix(baseupdate, Upsampledcw.w *10.0f, clamp(10.0f* motion_viewport_len, 0.0, 1.0))); + baseupdate = min(baseupdate, mix(baseupdate, Upsampledcw.w, clamp(motion_viewport_len *0.05f, 0.0, 1.0))); + float basealpha = baseupdate; + + const float EPSILON = 1.192e-07f; + float boxscale = max(depthfactor, clamp(motion_viewport_len * 0.05f, 0.0, 1.0)); + float boxsize = mix(scalefactor, 1.0f, boxscale); + vec3 sboxvar = rectboxvar * boxsize; + vec3 boxmin = rectboxcenter - sboxvar; + vec3 boxmax = rectboxcenter + sboxvar; + rectboxmax = min(rectboxmax, boxmax); + rectboxmin = max(rectboxmin, boxmin); + + vec3 clampedcolor = clamp(HistoryColor, rectboxmin, rectboxmax); + float startLerpValue = params.minLerpContribution; + if ((abs(mda.x) + abs(mda.y)) > 0.000001) startLerpValue = 0.0; + float lerpcontribution = (any(greaterThan(rectboxmin, HistoryColor)) || any(greaterThan(HistoryColor, rectboxmax))) ? startLerpValue : 1.0f; + + HistoryColor = mix(clampedcolor, HistoryColor, clamp(lerpcontribution, 0.0, 1.0)); + float basemin = min(basealpha, 0.1f); + basealpha = mix(basemin, basealpha, clamp(lerpcontribution, 0.0, 1.0)); + + ////blend color + float alphasum = max(EPSILON, basealpha + Upsampledcw.w); + float alpha = clamp(Upsampledcw.w / alphasum + params.reset, 0.0, 1.0); + + Upsampledcw.xyz = mix(HistoryColor, Upsampledcw.xyz, alpha); + + Output = vec4(Upsampledcw.xyz, 0.0); +} \ No newline at end of file diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl.meta new file mode 100644 index 0000000..66520fd --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: b3f52eb20bad6124e8835caaa5938444 +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_vertex.vs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_vertex.vs new file mode 100644 index 0000000..93382cf --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_vertex.vs @@ -0,0 +1,21 @@ +#version 320 es + +//============================================================================================================ +// +// +// Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. +// SPDX-License-Identifier: BSD-3-Clause +// +//============================================================================================================ + +precision highp float; +precision highp int; +layout (location = 0) in vec3 vPosition; +layout (location = 1) in vec2 vTexCord; + +out vec2 texCoord; +void main() +{ + gl_Position = vec4(vPosition,1.0); + texCoord = vTexCord; +} \ No newline at end of file diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_vertex.vs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_vertex.vs.meta new file mode 100644 index 0000000..919e6b8 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_vertex.vs.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 47316c9383c34b44c95bc0da4fad688e +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: From 87237bbddc116d921c44bbc74d176430a03e5300 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Mon, 23 Dec 2024 16:39:21 +0100 Subject: [PATCH 22/88] Initial conversion to HLSL --- .../SGSR2/Shaders/2_pass_fs/sgsr2.shader | 44 +-- .../Shaders/2_pass_fs/sgsr2_convert.hlsl | 106 +++---- .../Shaders/2_pass_fs/sgsr2_upscale.hlsl | 300 ++++++++---------- .../SGSR2/Shaders/2_pass_fs/sgsr2_vertex.vs | 21 -- .../Shaders/2_pass_fs/sgsr2_vertex.vs.meta | 7 - 5 files changed, 191 insertions(+), 287 deletions(-) delete mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_vertex.vs delete mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_vertex.vs.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2.shader b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2.shader index f6c9e25..fee70e2 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2.shader +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2.shader @@ -2,7 +2,11 @@ Shader "TND/sgsr2_2pass_fs" { Properties { - _MainTex ("Texture", 2D) = "white" {} + InputColor ("Texture", 2D) = "black" {} + InputDepth ("Texture", 2D) = "gray" {} + InputVelocity ("Texture", 2D) = "black" {} + PrevOutput ("Texture", 2D) = "black" {} + // TODO: MotionDepthAlphaBuffer? Or can we pass that directly from pass 0 to pass 1? } SubShader { @@ -10,42 +14,24 @@ Shader "TND/sgsr2_2pass_fs" Pass // Convert { - CGPROGRAM + HLSLPROGRAM #pragma vertex vert_img - #pragma fragment frag + #pragma fragment sgsr2_convert + #pragma target 4.5 - #include "UnityCG.cginc" - - sampler2D _MainTex; - - fixed4 frag (v2f_img i) : SV_Target - { - fixed4 col = tex2D(_MainTex, i.uv); - // just invert the colors - col.rgb = 1 - col.rgb; - return col; - } - ENDCG + #include "sgsr2_convert.hlsl" + ENDHLSL } Pass // Upscale { - CGPROGRAM + HLSLPROGRAM #pragma vertex vert_img - #pragma fragment frag - - #include "UnityCG.cginc" - - sampler2D _MainTex; + #pragma fragment sgsr2_upscale + #pragma target 4.5 - fixed4 frag (v2f_img i) : SV_Target - { - fixed4 col = tex2D(_MainTex, i.uv); - // just invert the colors - col.rgb = 1 - col.rgb; - return col; - } - ENDCG + #include "sgsr2_upscale.hlsl" + ENDHLSL } } } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl index dbbfbf1..28548cc 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl @@ -9,43 +9,17 @@ // //============================================================================================================ -precision highp float; -precision highp int; +// precision highp float; +// precision highp int; -layout(location = 0) out vec4 MotionDepthClipAlphaBuffer; -layout(location = 0) in highp vec2 texCoord; +TYPED_TEXTURE2D_X(half, InputDepth); +TYPED_TEXTURE2D_X(half2, InputVelocity); -layout(set = 0, binding = 1) uniform mediump sampler2D InputDepth; -layout(set = 0, binding = 2) uniform mediump sampler2D InputVelocity; - -layout(std140, set = 0, binding = 0) uniform Params -{ - vec4 clipToPrevClip[4]; - vec2 renderSize; - vec2 outputSize; - vec2 renderSizeRcp; - vec2 outputSizeRcp; - vec2 jitterOffset; - vec2 scaleRatio; - float cameraFovAngleHor; - float minLerpContribution; - float reset; - uint bSameCamera; -} params; - -vec2 decodeVelocityFromTexture(vec2 ev) { - const float inv_div = 1.0f / (0.499f * 0.5f); - vec2 dv; - dv.xy = ev.xy * inv_div - 32767.0f / 65535.0f * inv_div; - //dv.z = uintBitsToFloat((uint(round(ev.z * 65535.0f)) << 16) | uint(round(ev.w * 65535.0f))); - return dv; -} - -void main() +void sgsr2_convert(v2f_img i, out float4 MotionDepthClipAlphaBuffer: SV_Target) { - uvec2 InputPos = uvec2(texCoord * params.renderSize); - vec2 gatherCoord = texCoord - vec2(0.5) * params.renderSizeRcp; - + const half2 texCoord = i.uv; + uint2 InputPos = uint2(texCoord * renderSize); + float2 gatherCoord = texCoord - 0.5f * renderSizeRcp; // texture gather to find nearest depth // a b c d @@ -57,60 +31,58 @@ void main() //topLeft efba //topRight ghdc - vec4 btmLeft = textureGather(InputDepth, gatherCoord, 0); - vec2 v10 = vec2(params.renderSizeRcp.x * 2.0f, 0.0); - vec4 btmRight = textureGather(InputDepth,(gatherCoord+v10), 0); - vec2 v12 = vec2(0.0, params.renderSizeRcp.y * 2.0f); - vec4 topLeft = textureGather(InputDepth,(gatherCoord+v12), 0); - vec2 v14 = vec2(params.renderSizeRcp.x * 2.0f, params.renderSizeRcp.y * 2.0f); - vec4 topRight = textureGather(InputDepth,(gatherCoord+v14), 0); - float maxC = min(min(min(btmLeft.z,btmRight.w),topLeft.y),topRight.x); - float btmLeft4 = min(min(min(btmLeft.y,btmLeft.x),btmLeft.z),btmLeft.w); - float btmLeftMax9 = min(topLeft.x,min(min(maxC,btmLeft4),btmRight.x)); + float4 btmLeft = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, gatherCoord); + float2 v10 = float2(renderSizeRcp.x * 2.0f, 0.0); + float4 btmRight = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, (gatherCoord+v10)); + float2 v12 = float2(0.0, renderSizeRcp.y * 2.0f); + float4 topLeft = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, (gatherCoord+v12)); + float2 v14 = float2(renderSizeRcp.x * 2.0f, renderSizeRcp.y * 2.0f); + float4 topRight = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, (gatherCoord+v14)); + float maxC = DEPTH_NEAREST(DEPTH_NEAREST(DEPTH_NEAREST(btmLeft.z,btmRight.w),topLeft.y),topRight.x); + float btmLeft4 = DEPTH_NEAREST(DEPTH_NEAREST(DEPTH_NEAREST(btmLeft.y,btmLeft.x),btmLeft.z),btmLeft.w); + float btmLeftMax9 = DEPTH_NEAREST(topLeft.x,DEPTH_NEAREST(DEPTH_NEAREST(maxC,btmLeft4),btmRight.x)); float depthclip = 0.0; - if (maxC < 1.0 - 1.0e-05f) + if (DEPTH_CLIP(maxC)) { - float btmRight4 = min(min(min(btmRight.y,btmRight.x),btmRight.z),btmRight.w); - float topLeft4 = min(min(min(topLeft.y,topLeft.x),topLeft.z),topLeft.w); - float topRight4 = min(min(min(topRight.y,topRight.x),topRight.z),topRight.w); + float btmRight4 = DEPTH_NEAREST(DEPTH_NEAREST(DEPTH_NEAREST(btmRight.y,btmRight.x),btmRight.z),btmRight.w); + float topLeft4 = DEPTH_NEAREST(DEPTH_NEAREST(DEPTH_NEAREST(topLeft.y,topLeft.x),topLeft.z),topLeft.w); + float topRight4 = DEPTH_NEAREST(DEPTH_NEAREST(DEPTH_NEAREST(topRight.y,topRight.x),topRight.z),topRight.w); float Wdepth = 0.0; float Ksep = 1.37e-05f; - float Kfov = params.cameraFovAngleHor; - float diagonal_length = length(params.renderSize); + float Kfov = cameraFovAngleHor; + float diagonal_length = length(renderSize); float Ksep_Kfov_diagonal = Ksep * Kfov * diagonal_length; - float Depthsep = Ksep_Kfov_diagonal * (1.0 - maxC); - float EPSILON = 1.19e-07f; - Wdepth += clamp((Depthsep / (abs(maxC - btmLeft4) + EPSILON)), 0.0, 1.0); - Wdepth += clamp((Depthsep / (abs(maxC - btmRight4) + EPSILON)), 0.0, 1.0); - Wdepth += clamp((Depthsep / (abs(maxC - topLeft4) + EPSILON)), 0.0, 1.0); - Wdepth += clamp((Depthsep / (abs(maxC - topRight4) + EPSILON)), 0.0, 1.0); + float Depthsep = Ksep_Kfov_diagonal * (1.0 - maxC); + Wdepth += clamp((Depthsep / (abs(maxC - btmLeft4) + EPSILON)), 0.0, 1.0); + Wdepth += clamp((Depthsep / (abs(maxC - btmRight4) + EPSILON)), 0.0, 1.0); + Wdepth += clamp((Depthsep / (abs(maxC - topLeft4) + EPSILON)), 0.0, 1.0); + Wdepth += clamp((Depthsep / (abs(maxC - topRight4) + EPSILON)), 0.0, 1.0); depthclip = clamp(1.0f - Wdepth * 0.25, 0.0, 1.0); } //refer to ue/fsr2 PostProcessFFX_FSR2ConvertVelocity.usf, and using nearest depth for dilated motion - vec4 EncodedVelocity = texelFetch(InputVelocity, ivec2(InputPos), 0); + float2 EncodedVelocity = LOAD_TEXTURE2D_X(InputVelocity, int2(InputPos)); - vec2 motion; - if (EncodedVelocity.x > 0.0) + float2 motion; + if (any(abs(EncodedVelocity)) > 0.0) { motion = decodeVelocityFromTexture(EncodedVelocity.xy); } else { #ifdef REQUEST_NDC_Y_UP - vec2 ScreenPos = vec2(2.0f * texCoord.x - 1.0f, 1.0f - 2.0f * texCoord.y); + float2 ScreenPos = float2(2.0f * texCoord.x - 1.0f, 1.0f - 2.0f * texCoord.y); #else - vec2 ScreenPos = vec2(2.0f * texCoord - 1.0f); + float2 ScreenPos = float2(2.0f * texCoord - 1.0f); #endif - vec3 Position = vec3(ScreenPos, btmLeftMax9); //this_clip - vec4 PreClip = params.clipToPrevClip[3] + ((params.clipToPrevClip[2] * Position.z) + ((params.clipToPrevClip[1] * ScreenPos.y) + (params.clipToPrevClip[0] * ScreenPos.x))); - vec2 PreScreen = PreClip.xy / PreClip.w; + float3 Position = float3(ScreenPos, btmLeftMax9); //this_clip + float4 PreClip = clipToPrevClip[3] + ((clipToPrevClip[2] * Position.z) + ((clipToPrevClip[1] * ScreenPos.y) + (clipToPrevClip[0] * ScreenPos.x))); + float2 PreScreen = PreClip.xy / PreClip.w; motion = Position.xy - PreScreen; } - MotionDepthClipAlphaBuffer = vec4(motion, depthclip, 0.0); - -} \ No newline at end of file + MotionDepthClipAlphaBuffer = float4(motion, depthclip, 0.0); +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl index d3921a1..26303b1 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl @@ -9,56 +9,31 @@ // //============================================================================================================ -precision mediump float; -precision highp int; +//precision mediump float; +//precision highp int; -float FastLanczos(float base) -{ - float y = base - 1.0f; - float y2 = y * y; - float y_temp = 0.75f * y + y2; - return y_temp * y2; -} - -layout(location = 0) out mediump vec4 Output; -layout(location = 0) in highp vec2 texCoord; - -layout(set = 0, binding = 1) uniform mediump sampler2D PrevOutput; -layout(set = 0, binding = 2) uniform mediump sampler2D MotionDepthClipAlphaBuffer; -layout(set = 0, binding = 3) uniform mediump sampler2D InputColor; +TYPED_TEXTURE2D_X(half4, PrevOutput); +TYPED_TEXTURE2D_X(half4, MotionDepthClipAlphaBuffer); +TYPED_TEXTURE2D_X(half4, InputColor); -layout(std140, set = 0, binding = 0) uniform readonly Params +void sgsr2_upscale(v2f_img i, out half4 Output: SV_Target) { - highp vec4 clipToPrevClip[4]; - highp vec2 renderSize; - highp vec2 outputSize; - highp vec2 renderSizeRcp; - highp vec2 outputSizeRcp; - highp vec2 jitterOffset; - highp vec2 scaleRatio; - highp float cameraFovAngleHor; - highp float minLerpContribution; - highp float reset; - uint bSameCamera; -} params; + const half2 texCoord = i.uv; + half Biasmax_viewportXScale = scaleRatio.x; + half scalefactor = scaleRatio.y; -void main() -{ - float Biasmax_viewportXScale = params.scaleRatio.x; - float scalefactor = params.scaleRatio.y; - - highp vec2 Hruv = texCoord; + float2 Hruv = texCoord; - highp vec2 Jitteruv; - Jitteruv.x = clamp(Hruv.x + (params.jitterOffset.x * params.outputSizeRcp.x), 0.0, 1.0); - Jitteruv.y = clamp(Hruv.y + (params.jitterOffset.y * params.outputSizeRcp.y), 0.0, 1.0); + float2 Jitteruv; + Jitteruv.x = clamp(Hruv.x + (jitterOffset.x * displaySizeRcp.x), 0.0, 1.0); + Jitteruv.y = clamp(Hruv.y + (jitterOffset.y * displaySizeRcp.y), 0.0, 1.0); - highp ivec2 InputPos = ivec2(Jitteruv * params.renderSize); + int2 InputPos = int2(Jitteruv * renderSize); - highp vec3 mda = textureLod(MotionDepthClipAlphaBuffer, Jitteruv, 0.0).xyz; - highp vec2 Motion = mda.xy; + float3 mda = SAMPLE_TEXTURE2D_X_LOD(MotionDepthClipAlphaBuffer, S_LINEAR_CLAMP, Jitteruv, 0.0).xyz; + float2 Motion = mda.xy; - highp vec2 PrevUV; + float2 PrevUV; PrevUV.x = clamp(-0.5 * Motion.x + Hruv.x, 0.0, 1.0); #ifdef REQUEST_NDC_Y_UP PrevUV.y = clamp(0.5 * Motion.y + Hruv.y, 0.0, 1.0); @@ -66,110 +41,110 @@ void main() PrevUV.y = clamp(-0.5 * Motion.y + Hruv.y, 0.0, 1.0); #endif - float depthfactor = mda.z; + half depthfactor = mda.z; - vec3 HistoryColor = textureLod(PrevOutput, PrevUV, 0.0).xyz; + half3 HistoryColor = SAMPLE_TEXTURE2D_X_LOD(PrevOutput, S_LINEAR_CLAMP, PrevUV, 0.0).xyz; /////upsample and compute box - vec4 Upsampledcw = vec4(0.0); - float biasmax = Biasmax_viewportXScale ; - float biasmin = max(1.0f, 0.3 + 0.3 * biasmax); - float biasfactor = 0.25f * depthfactor; - float kernelbias = mix(biasmax, biasmin, biasfactor); - float motion_viewport_len = length(Motion * params.outputSize); - float curvebias = mix(-2.0, -3.0, clamp(motion_viewport_len * 0.02, 0.0, 1.0)); - - vec3 rectboxcenter = vec3(0.0); - vec3 rectboxvar = vec3(0.0); - float rectboxweight = 0.0; - highp vec2 srcpos = vec2(InputPos) + vec2(0.5) - params.jitterOffset; + half4 Upsampledcw = 0.0f; + half biasmax = Biasmax_viewportXScale ; + half biasmin = max(1.0f, 0.3 + 0.3 * biasmax); + half biasfactor = 0.25f * depthfactor; + half kernelbias = lerp(biasmax, biasmin, biasfactor); + half motion_viewport_len = length(Motion * displaySize); + half curvebias = lerp(-2.0, -3.0, clamp(motion_viewport_len * 0.02, 0.0, 1.0)); + + half3 rectboxcenter = 0.0f; + half3 rectboxvar = 0.0f; + half rectboxweight = 0.0; + float2 srcpos = half2(InputPos) + 0.5f - jitterOffset; kernelbias *= 0.5f; - float kernelbias2 = kernelbias * kernelbias; - vec2 srcpos_srcOutputPos = srcpos - Hruv * params.renderSize; //srcOutputPos = Hruv * params.renderSize; - vec3 rectboxmin; - vec3 rectboxmax; - vec3 topMid = texelFetch(InputColor, InputPos + ivec2(0, 1), 0).xyz; + half kernelbias2 = kernelbias * kernelbias; + half2 srcpos_srcOutputPos = srcpos - Hruv * renderSize; //srcOutputPos = Hruv * params.renderSize; + half3 rectboxmin; + half3 rectboxmax; + half3 topMid = LOAD_TEXTURE2D_X(InputColor, InputPos + int2(0, 1)).xyz; { - vec3 samplecolor = topMid; - vec2 baseoffset = srcpos_srcOutputPos + vec2(0.0, 1.0); - float baseoffset_dot = dot(baseoffset, baseoffset); - float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); - float weight = FastLanczos(base); - Upsampledcw += vec4(samplecolor * weight, weight); - float boxweight = exp(baseoffset_dot * curvebias); + half3 samplecolor = topMid; + half2 baseoffset = srcpos_srcOutputPos + half2(0.0, 1.0); + half baseoffset_dot = dot(baseoffset, baseoffset); + half base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + half weight = FastLanczos(base); + Upsampledcw += half4(samplecolor * weight, weight); + half boxweight = exp(baseoffset_dot * curvebias); rectboxmin = samplecolor; rectboxmax = samplecolor; - vec3 wsample = samplecolor * boxweight; + half3 wsample = samplecolor * boxweight; rectboxcenter += wsample; rectboxvar += (samplecolor * wsample); rectboxweight += boxweight; } - vec3 rightMid = texelFetch(InputColor, InputPos + ivec2(1, 0), 0).xyz; + half3 rightMid = LOAD_TEXTURE2D_X(InputColor, InputPos + int2(1, 0)).xyz; { - vec3 samplecolor = rightMid; - vec2 baseoffset = srcpos_srcOutputPos + vec2(1.0, 0.0); - float baseoffset_dot = dot(baseoffset, baseoffset); - float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); - float weight = FastLanczos(base); - Upsampledcw += vec4(samplecolor * weight, weight); - float boxweight = exp(baseoffset_dot * curvebias); + half3 samplecolor = rightMid; + half2 baseoffset = srcpos_srcOutputPos + half2(1.0, 0.0); + half baseoffset_dot = dot(baseoffset, baseoffset); + half base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + half weight = FastLanczos(base); + Upsampledcw += half4(samplecolor * weight, weight); + half boxweight = exp(baseoffset_dot * curvebias); rectboxmin = min(rectboxmin, samplecolor); rectboxmax = max(rectboxmax, samplecolor); - vec3 wsample = samplecolor * boxweight; + half3 wsample = samplecolor * boxweight; rectboxcenter += wsample; rectboxvar += (samplecolor * wsample); rectboxweight += boxweight; } - vec3 leftMid = texelFetch(InputColor, InputPos + ivec2(-1, 0) , 0).xyz; + half3 leftMid = LOAD_TEXTURE2D_X(InputColor, InputPos + int2(-1, 0)).xyz; { - vec3 samplecolor = leftMid; - vec2 baseoffset = srcpos_srcOutputPos + vec2(-1.0, 0.0); - float baseoffset_dot = dot(baseoffset, baseoffset); - float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); - float weight = FastLanczos(base); - Upsampledcw += vec4(samplecolor * weight, weight); - float boxweight = exp(baseoffset_dot * curvebias); + half3 samplecolor = leftMid; + half2 baseoffset = srcpos_srcOutputPos + half2(-1.0, 0.0); + half baseoffset_dot = dot(baseoffset, baseoffset); + half base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + half weight = FastLanczos(base); + Upsampledcw += half4(samplecolor * weight, weight); + half boxweight = exp(baseoffset_dot * curvebias); rectboxmin = min(rectboxmin, samplecolor); rectboxmax = max(rectboxmax, samplecolor); - vec3 wsample = samplecolor * boxweight; + half3 wsample = samplecolor * boxweight; rectboxcenter += wsample; rectboxvar += (samplecolor * wsample); rectboxweight += boxweight; } - vec3 centerMid = texelFetch(InputColor, InputPos + ivec2(0, 0) , 0).xyz; + half3 centerMid = LOAD_TEXTURE2D_X(InputColor, InputPos + int2(0, 0)).xyz; { - vec3 samplecolor = centerMid; - vec2 baseoffset = srcpos_srcOutputPos; - float baseoffset_dot = dot(baseoffset, baseoffset); - float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); - float weight = FastLanczos(base); - Upsampledcw += vec4(samplecolor * weight, weight); - float boxweight = exp(baseoffset_dot * curvebias); + half3 samplecolor = centerMid; + half2 baseoffset = srcpos_srcOutputPos; + half baseoffset_dot = dot(baseoffset, baseoffset); + half base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + half weight = FastLanczos(base); + Upsampledcw += half4(samplecolor * weight, weight); + half boxweight = exp(baseoffset_dot * curvebias); rectboxmin = min(rectboxmin, samplecolor); rectboxmax = max(rectboxmax, samplecolor); - vec3 wsample = samplecolor * boxweight; + half3 wsample = samplecolor * boxweight; rectboxcenter += wsample; rectboxvar += (samplecolor * wsample); rectboxweight += boxweight; } - vec3 btmMid = texelFetch(InputColor, InputPos + ivec2(0, -1) , 0).xyz; + half3 btmMid = LOAD_TEXTURE2D_X(InputColor, InputPos + int2(0, -1)).xyz; { - vec3 samplecolor = btmMid; - vec2 baseoffset = srcpos_srcOutputPos + vec2(0.0, -1.0); - float baseoffset_dot = dot(baseoffset, baseoffset); - float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); - float weight = FastLanczos(base); - Upsampledcw += vec4(samplecolor * weight, weight); - float boxweight = exp(baseoffset_dot * curvebias); + half3 samplecolor = btmMid; + half2 baseoffset = srcpos_srcOutputPos + half2(0.0, -1.0); + half baseoffset_dot = dot(baseoffset, baseoffset); + half base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + half weight = FastLanczos(base); + Upsampledcw += half4(samplecolor * weight, weight); + half boxweight = exp(baseoffset_dot * curvebias); rectboxmin = min(rectboxmin, samplecolor); rectboxmax = max(rectboxmax, samplecolor); - vec3 wsample = samplecolor * boxweight; + half3 wsample = samplecolor * boxweight; rectboxcenter += wsample; rectboxvar += (samplecolor * wsample); rectboxweight += boxweight; @@ -179,66 +154,66 @@ void main() if (false) //maybe disable this for ultra performance, true could generate more realistic output { { - vec3 topRight = texelFetch(InputColor, InputPos + ivec2(1, 1), 0).xyz; - vec3 samplecolor = topRight; - vec2 baseoffset = srcpos_srcOutputPos + vec2(1.0, 1.0); - float baseoffset_dot = dot(baseoffset, baseoffset); - float base = clamp(baseoffset_dot * kernelbias2, 0.0, 1.0); - float weight = FastLanczos(base); - Upsampledcw += vec4(samplecolor * weight, weight); - float boxweight = exp(baseoffset_dot * curvebias); + half3 topRight = LOAD_TEXTURE2D_X(InputColor, InputPos + int2(1, 1)).xyz; + half3 samplecolor = topRight; + half2 baseoffset = srcpos_srcOutputPos + half2(1.0, 1.0); + half baseoffset_dot = dot(baseoffset, baseoffset); + half base = clamp(baseoffset_dot * kernelbias2, 0.0, 1.0); + half weight = FastLanczos(base); + Upsampledcw += half4(samplecolor * weight, weight); + half boxweight = exp(baseoffset_dot * curvebias); rectboxmin = min(rectboxmin, samplecolor); rectboxmax = max(rectboxmax, samplecolor); - vec3 wsample = samplecolor * boxweight; + half3 wsample = samplecolor * boxweight; rectboxcenter += wsample; rectboxvar += (samplecolor * wsample); rectboxweight += boxweight; } { - vec3 topLeft = texelFetch(InputColor, InputPos + ivec2(-1, 1), 0).xyz; - vec3 samplecolor = topLeft; - vec2 baseoffset = srcpos_srcOutputPos + vec2(-1.0, 1.0); - float baseoffset_dot = dot(baseoffset, baseoffset); - float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); - float weight = FastLanczos(base); - Upsampledcw += vec4(samplecolor * weight, weight); - float boxweight = exp(baseoffset_dot * curvebias); + half3 topLeft = LOAD_TEXTURE2D_X(InputColor, InputPos + int2(-1, 1)).xyz; + half3 samplecolor = topLeft; + half2 baseoffset = srcpos_srcOutputPos + half2(-1.0, 1.0); + half baseoffset_dot = dot(baseoffset, baseoffset); + half base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + half weight = FastLanczos(base); + Upsampledcw += half4(samplecolor * weight, weight); + half boxweight = exp(baseoffset_dot * curvebias); rectboxmin = min(rectboxmin, samplecolor); rectboxmax = max(rectboxmax, samplecolor); - vec3 wsample = samplecolor * boxweight; + half3 wsample = samplecolor * boxweight; rectboxcenter += wsample; rectboxvar += (samplecolor * wsample); rectboxweight += boxweight; } { - vec3 btmRight = texelFetch(InputColor, InputPos + ivec2(1, -1) , 0).xyz; - vec3 samplecolor = btmRight; - vec2 baseoffset = srcpos_srcOutputPos + vec2(1.0, -1.0); - float baseoffset_dot = dot(baseoffset, baseoffset); - float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); - float weight = FastLanczos(base); - Upsampledcw += vec4(samplecolor * weight, weight); - float boxweight = exp(baseoffset_dot * curvebias); + half3 btmRight = LOAD_TEXTURE2D_X(InputColor, InputPos + int2(1, -1)).xyz; + half3 samplecolor = btmRight; + half2 baseoffset = srcpos_srcOutputPos + half2(1.0, -1.0); + half baseoffset_dot = dot(baseoffset, baseoffset); + half base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + half weight = FastLanczos(base); + Upsampledcw += half4(samplecolor * weight, weight); + half boxweight = exp(baseoffset_dot * curvebias); rectboxmin = min(rectboxmin, samplecolor); rectboxmax = max(rectboxmax, samplecolor); - vec3 wsample = samplecolor * boxweight; + half3 wsample = samplecolor * boxweight; rectboxcenter += wsample; rectboxvar += (samplecolor * wsample); rectboxweight += boxweight; } { - vec3 btmLeft = texelFetch(InputColor, InputPos + ivec2(-1, -1) , 0).xyz; - vec3 samplecolor = btmLeft; - vec2 baseoffset = srcpos_srcOutputPos + vec2(-1.0, -1.0); - float baseoffset_dot = dot(baseoffset, baseoffset); - float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); - float weight = FastLanczos(base); - Upsampledcw += vec4(samplecolor * weight, weight); - float boxweight = exp(baseoffset_dot * curvebias); + half3 btmLeft = LOAD_TEXTURE2D_X(InputColor, InputPos + int2(-1, -1)).xyz; + half3 samplecolor = btmLeft; + half2 baseoffset = srcpos_srcOutputPos + half2(-1.0, -1.0); + half baseoffset_dot = dot(baseoffset, baseoffset); + half base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f); + half weight = FastLanczos(base); + Upsampledcw += half4(samplecolor * weight, weight); + half boxweight = exp(baseoffset_dot * curvebias); rectboxmin = min(rectboxmin, samplecolor); rectboxmax = max(rectboxmax, samplecolor); - vec3 wsample = samplecolor * boxweight; + half3 wsample = samplecolor * boxweight; rectboxcenter += wsample; rectboxvar += (samplecolor * wsample); rectboxweight += boxweight; @@ -250,37 +225,36 @@ void main() rectboxvar *= rectboxweight; rectboxvar = sqrt(abs(rectboxvar - rectboxcenter * rectboxcenter)); - Upsampledcw.xyz = clamp(Upsampledcw.xyz / Upsampledcw.w, rectboxmin-vec3(0.075), rectboxmax+vec3(0.075)); + Upsampledcw.xyz = clamp(Upsampledcw.xyz / Upsampledcw.w, rectboxmin-0.075f, rectboxmax+0.075f); Upsampledcw.w = Upsampledcw.w * (1.0f / 3.0f) ; - float baseupdate = 1.0f - depthfactor; - baseupdate = min(baseupdate, mix(baseupdate, Upsampledcw.w *10.0f, clamp(10.0f* motion_viewport_len, 0.0, 1.0))); - baseupdate = min(baseupdate, mix(baseupdate, Upsampledcw.w, clamp(motion_viewport_len *0.05f, 0.0, 1.0))); - float basealpha = baseupdate; + half baseupdate = 1.0f - depthfactor; + baseupdate = min(baseupdate, lerp(baseupdate, Upsampledcw.w *10.0f, clamp(10.0f* motion_viewport_len, 0.0, 1.0))); + baseupdate = min(baseupdate, lerp(baseupdate, Upsampledcw.w, clamp(motion_viewport_len *0.05f, 0.0, 1.0))); + half basealpha = baseupdate; - const float EPSILON = 1.192e-07f; - float boxscale = max(depthfactor, clamp(motion_viewport_len * 0.05f, 0.0, 1.0)); - float boxsize = mix(scalefactor, 1.0f, boxscale); - vec3 sboxvar = rectboxvar * boxsize; - vec3 boxmin = rectboxcenter - sboxvar; - vec3 boxmax = rectboxcenter + sboxvar; + half boxscale = max(depthfactor, clamp(motion_viewport_len * 0.05f, 0.0, 1.0)); + half boxsize = lerp(scalefactor, 1.0f, boxscale); + half3 sboxvar = rectboxvar * boxsize; + half3 boxmin = rectboxcenter - sboxvar; + half3 boxmax = rectboxcenter + sboxvar; rectboxmax = min(rectboxmax, boxmax); rectboxmin = max(rectboxmin, boxmin); - vec3 clampedcolor = clamp(HistoryColor, rectboxmin, rectboxmax); - float startLerpValue = params.minLerpContribution; + half3 clampedcolor = clamp(HistoryColor, rectboxmin, rectboxmax); + half startLerpValue = minLerpContribution; if ((abs(mda.x) + abs(mda.y)) > 0.000001) startLerpValue = 0.0; - float lerpcontribution = (any(greaterThan(rectboxmin, HistoryColor)) || any(greaterThan(HistoryColor, rectboxmax))) ? startLerpValue : 1.0f; + half lerpcontribution = (any(rectboxmin > HistoryColor) || any(HistoryColor > rectboxmax)) ? startLerpValue : 1.0f; - HistoryColor = mix(clampedcolor, HistoryColor, clamp(lerpcontribution, 0.0, 1.0)); - float basemin = min(basealpha, 0.1f); - basealpha = mix(basemin, basealpha, clamp(lerpcontribution, 0.0, 1.0)); + HistoryColor = lerp(clampedcolor, HistoryColor, clamp(lerpcontribution, 0.0, 1.0)); + half basemin = min(basealpha, 0.1f); + basealpha = lerp(basemin, basealpha, clamp(lerpcontribution, 0.0, 1.0)); ////blend color - float alphasum = max(EPSILON, basealpha + Upsampledcw.w); - float alpha = clamp(Upsampledcw.w / alphasum + params.reset, 0.0, 1.0); + half alphasum = max(EPSILON, basealpha + Upsampledcw.w); + half alpha = clamp(Upsampledcw.w / alphasum + reset, 0.0, 1.0); - Upsampledcw.xyz = mix(HistoryColor, Upsampledcw.xyz, alpha); + Upsampledcw.xyz = lerp(HistoryColor, Upsampledcw.xyz, alpha); - Output = vec4(Upsampledcw.xyz, 0.0); -} \ No newline at end of file + Output = half4(Upsampledcw.xyz, 0.0); +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_vertex.vs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_vertex.vs deleted file mode 100644 index 93382cf..0000000 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_vertex.vs +++ /dev/null @@ -1,21 +0,0 @@ -#version 320 es - -//============================================================================================================ -// -// -// Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. -// SPDX-License-Identifier: BSD-3-Clause -// -//============================================================================================================ - -precision highp float; -precision highp int; -layout (location = 0) in vec3 vPosition; -layout (location = 1) in vec2 vTexCord; - -out vec2 texCoord; -void main() -{ - gl_Position = vec4(vPosition,1.0); - texCoord = vTexCord; -} \ No newline at end of file diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_vertex.vs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_vertex.vs.meta deleted file mode 100644 index 919e6b8..0000000 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_vertex.vs.meta +++ /dev/null @@ -1,7 +0,0 @@ -fileFormatVersion: 2 -guid: 47316c9383c34b44c95bc0da4fad688e -DefaultImporter: - externalObjects: {} - userData: - assetBundleName: - assetBundleVariant: From cac783bebc3ca6275e167bbce4bece0ac1d4f2c9 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Tue, 24 Dec 2024 16:44:03 +0100 Subject: [PATCH 23/88] Working SGSR2 2-pass FS implementation, bit dirty but good enough for testing. --- .../PostProcessing/PostProcessResources.asset | 2 + .../Runtime/Effects/Upscaling.cs | 2 + .../Runtime/Effects/Upscaling/SGSR2/SGSR2.cs | 6 +++ .../SGSR2/Shaders/2_pass_fs/sgsr2.shader | 14 +++---- .../Shaders/2_pass_fs/sgsr2_convert.hlsl | 7 +++- .../Shaders/2_pass_fs/sgsr2_upscale.hlsl | 5 +++ .../Upscaling/SGSR2/Shaders/sgsr2_common.hlsl | 2 +- .../Upscaling/SGSR2Upscaler_2PassCS.cs | 4 +- .../Upscaling/SGSR2Upscaler_2PassFS.cs | 39 +++++++++++++++++++ .../Upscaling/SGSR2Upscaler_2PassFS.cs.meta | 3 ++ .../Upscaling/SGSR2Upscaler_3PassCS.cs | 6 +-- .../Runtime/PostProcessResources.cs | 5 +++ 12 files changed, 79 insertions(+), 16 deletions(-) create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassFS.cs create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassFS.cs.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset index 5c81a3d..6041f70 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset @@ -114,6 +114,8 @@ MonoBehaviour: scalableAO: {fileID: 4800000, guid: d7640629310e79646af0f46eb55ae466, type: 3} multiScaleAO: {fileID: 4800000, guid: 67f9497810829eb4791ec19e95781e51, type: 3} screenSpaceReflections: {fileID: 4800000, guid: f997a3dc9254c44459323cced085150c, type: 3} + sgsr2Upscaler: + twoPassFragment: {fileID: 4800000, guid: 9e367486dadedbc4da8313a481aa8a27, type: 3} computeShaders: autoExposure: {fileID: 7200000, guid: 34845e0ca016b7448842e965db5890a5, type: 3} exposureHistogram: {fileID: 7200000, guid: 8c2fcbdf9bc58664f89917f7b9d79501, type: 3} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs index 9848437..49fc4f0 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs @@ -16,6 +16,7 @@ namespace UnityEngine.Rendering.PostProcessing [InspectorName("FidelityFX Super Resolution 2.2 (FSR2)")] FSR2, [InspectorName("FidelityFX Super Resolution 3.1 (FSR3)")] FSR3, //[InspectorName("Arm Accuracy Super Resolution (ASR)")] ASR, + [InspectorName("Snapdragon Game Super Resolution 2 (SGSR2) 2-Pass Fragment")] SGSR2_2PassFS, [InspectorName("Snapdragon Game Super Resolution 2 (SGSR2) 2-Pass Compute")] SGSR2_2PassCS, [InspectorName("Snapdragon Game Super Resolution 2 (SGSR2) 3-Pass Compute")] SGSR2_3PassCS, [InspectorName("PlayStation Spectral Super Resolution (PSSR)")] PSSR, @@ -181,6 +182,7 @@ namespace UnityEngine.Rendering.PostProcessing { UpscalerType.FSR2 when FSR2Upscaler.IsSupported => new FSR2Upscaler(), UpscalerType.FSR3 when FSR3Upscaler.IsSupported => new FSR3Upscaler(), + UpscalerType.SGSR2_2PassFS => new SGSR2Upscaler_2PassFS(), UpscalerType.SGSR2_2PassCS when SGSR2Upscaler.IsSupported => new SGSR2Upscaler_2PassCS(), UpscalerType.SGSR2_3PassCS when SGSR2Upscaler.IsSupported => new SGSR2Upscaler_3PassCS(), _ => new FSR2Upscaler(), // Fallback for when the selected upscaler is not supported on the current hardware diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs index 8100bac..cf8a280 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs @@ -29,6 +29,12 @@ public static class SGSR2 public uint reset; } + [Serializable] + public class Shaders + { + public Shader twoPassFragment; + } + [Serializable] public class ComputeShaders { diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2.shader b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2.shader index fee70e2..84b89ab 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2.shader +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2.shader @@ -1,23 +1,18 @@ Shader "TND/sgsr2_2pass_fs" { - Properties - { - InputColor ("Texture", 2D) = "black" {} - InputDepth ("Texture", 2D) = "gray" {} - InputVelocity ("Texture", 2D) = "black" {} - PrevOutput ("Texture", 2D) = "black" {} - // TODO: MotionDepthAlphaBuffer? Or can we pass that directly from pass 0 to pass 1? - } SubShader { Cull Off ZWrite Off ZTest Always Pass // Convert { + Name "Convert" + HLSLPROGRAM #pragma vertex vert_img #pragma fragment sgsr2_convert #pragma target 4.5 + #pragma enable_d3d11_debug_symbols #include "sgsr2_convert.hlsl" ENDHLSL @@ -25,10 +20,13 @@ Shader "TND/sgsr2_2pass_fs" Pass // Upscale { + Name "Upscale" + HLSLPROGRAM #pragma vertex vert_img #pragma fragment sgsr2_upscale #pragma target 4.5 + #pragma enable_d3d11_debug_symbols #include "sgsr2_upscale.hlsl" ENDHLSL diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl index 28548cc..abbf9e8 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl @@ -12,8 +12,11 @@ // precision highp float; // precision highp int; -TYPED_TEXTURE2D_X(half, InputDepth); -TYPED_TEXTURE2D_X(half2, InputVelocity); +// TODO: should use the SAMPLE_DEPTH_TEXTURE macros here? +TYPED_TEXTURE2D_X(half, _CameraDepthTexture); +TYPED_TEXTURE2D_X(half2, _CameraMotionVectorsTexture); +#define InputDepth _CameraDepthTexture +#define InputVelocity _CameraMotionVectorsTexture void sgsr2_convert(v2f_img i, out float4 MotionDepthClipAlphaBuffer: SV_Target) { diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl index 26303b1..06da4d7 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl @@ -16,6 +16,11 @@ TYPED_TEXTURE2D_X(half4, PrevOutput); TYPED_TEXTURE2D_X(half4, MotionDepthClipAlphaBuffer); TYPED_TEXTURE2D_X(half4, InputColor); +// TODO: figure out the appropriate way of handling these SamplerStates in such a way that it all works with SRPs as well +SamplerState samplerPrevOutput; +SamplerState samplerMotionDepthClipAlphaBuffer; +SamplerState samplerInputColor; + void sgsr2_upscale(v2f_img i, out half4 Output: SV_Target) { const half2 texCoord = i.uv; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl index befb867..2fb1bf4 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl @@ -12,7 +12,7 @@ #define DEPTH_CLIP(depth) ((depth) < 1.0f - 1.0e-05f) #endif -cbuffer Params : register(b0) +cbuffer cbSGSR2 : register(b0) { uint2 renderSize; uint2 displaySize; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs index 7e4dfbe..561a001 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs @@ -17,7 +17,7 @@ namespace UnityEngine.Rendering.PostProcessing var shader = context.resources.computeShaders.sgsr2Upscaler.twoPassCompute.convert; int kernelIndex = shader.FindKernel("CS"); - cmd.SetComputeConstantBufferParam(shader, "Params", _paramsBuffer, 0, Marshal.SizeOf()); + cmd.SetComputeConstantBufferParam(shader, "cbSGSR2", _paramsBuffer, 0, Marshal.SizeOf()); cmd.SetComputeTextureParam(shader, kernelIndex, "InputColor", context.source); cmd.SetComputeTextureParam(shader, kernelIndex, "InputDepth", BuiltinRenderTextureType.CameraTarget, 0, RenderTextureSubElement.Depth); cmd.SetComputeTextureParam(shader, kernelIndex, "InputVelocity", BuiltinRenderTextureType.MotionVectors); @@ -36,7 +36,7 @@ namespace UnityEngine.Rendering.PostProcessing int kernelIndex = shader.FindKernel("CS"); uint frameIndex = _frameCount % 2; - cmd.SetComputeConstantBufferParam(shader, "Params", _paramsBuffer, 0, Marshal.SizeOf()); + cmd.SetComputeConstantBufferParam(shader, "cbSGSR2", _paramsBuffer, 0, Marshal.SizeOf()); cmd.SetComputeTextureParam(shader, kernelIndex, "PrevHistoryOutput", _upscaleHistory[frameIndex ^ 1]); cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthClipAlphaBuffer", _motionDepthClipAlpha); cmd.SetComputeTextureParam(shader, kernelIndex, "YCoCgColor", _colorLuma); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassFS.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassFS.cs new file mode 100644 index 0000000..2f67b19 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassFS.cs @@ -0,0 +1,39 @@ +using System.Runtime.InteropServices; + +namespace UnityEngine.Rendering.PostProcessing +{ + internal class SGSR2Upscaler_2PassFS: SGSR2Upscaler + { + protected override string VariantName => "SGSR2 2-Pass Fragment"; + + private Material _material; + + public override void CreateContext(PostProcessRenderContext context, Upscaling config) + { + base.CreateContext(context, config); + + _material = new Material(context.resources.shaders.sgsr2Upscaler.twoPassFragment); + } + + public override void DestroyContext() + { + RuntimeUtilities.Destroy(_material); + + base.DestroyContext(); + } + + protected override void DoRender(CommandBuffer cmd, PostProcessRenderContext context, Upscaling config) + { + uint frameIndex = _frameCount % 2; + + cmd.SetGlobalTexture("InputColor", context.source); + cmd.SetGlobalTexture("MotionDepthClipAlphaBuffer", _motionDepthClipAlpha); + cmd.SetGlobalTexture("PrevOutput", _upscaleHistory[frameIndex ^ 1]); + cmd.SetGlobalConstantBuffer(_paramsBuffer, "cbSGSR2", 0, Marshal.SizeOf()); + + cmd.Blit(BuiltinRenderTextureType.None, _motionDepthClipAlpha, _material, 0); + cmd.Blit(BuiltinRenderTextureType.None, _upscaleHistory[frameIndex], _material, 1); + cmd.Blit(_upscaleHistory[frameIndex], context.destination); + } + } +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassFS.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassFS.cs.meta new file mode 100644 index 0000000..b1bf38c --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassFS.cs.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: 50ebaec17d8940c0ac51a8721f9f9419 +timeCreated: 1734977118 \ No newline at end of file diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_3PassCS.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_3PassCS.cs index a32e2e3..6be8110 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_3PassCS.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_3PassCS.cs @@ -18,7 +18,7 @@ namespace UnityEngine.Rendering.PostProcessing var shader = context.resources.computeShaders.sgsr2Upscaler.threePassCompute.convert; int kernelIndex = shader.FindKernel("CS"); - cmd.SetComputeConstantBufferParam(shader, "Params", _paramsBuffer, 0, Marshal.SizeOf()); + cmd.SetComputeConstantBufferParam(shader, "cbSGSR2", _paramsBuffer, 0, Marshal.SizeOf()); cmd.SetComputeTextureParam(shader, kernelIndex, "InputOpaqueColor", config.ColorOpaqueOnly); cmd.SetComputeTextureParam(shader, kernelIndex, "InputColor", context.source); cmd.SetComputeTextureParam(shader, kernelIndex, "InputDepth", BuiltinRenderTextureType.CameraTarget, 0, RenderTextureSubElement.Depth); @@ -38,7 +38,7 @@ namespace UnityEngine.Rendering.PostProcessing int kernelIndex = shader.FindKernel("CS"); uint frameIndex = _frameCount % 2; - cmd.SetComputeConstantBufferParam(shader, "Params", _paramsBuffer, 0, Marshal.SizeOf()); + cmd.SetComputeConstantBufferParam(shader, "cbSGSR2", _paramsBuffer, 0, Marshal.SizeOf()); cmd.SetComputeTextureParam(shader, kernelIndex, "PrevLumaHistory", _lumaHistory[frameIndex ^ 1]); cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthAlphaBuffer", _motionDepthAlpha); cmd.SetComputeTextureParam(shader, kernelIndex, "YCoCgColor", _colorLuma); @@ -57,7 +57,7 @@ namespace UnityEngine.Rendering.PostProcessing int kernelIndex = shader.FindKernel("CS"); uint frameIndex = _frameCount % 2; - cmd.SetComputeConstantBufferParam(shader, "Params", _paramsBuffer, 0, Marshal.SizeOf()); + cmd.SetComputeConstantBufferParam(shader, "cbSGSR2", _paramsBuffer, 0, Marshal.SizeOf()); cmd.SetComputeTextureParam(shader, kernelIndex, "PrevHistoryOutput", _upscaleHistory[frameIndex ^ 1]); cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthClipAlphaBuffer", _motionDepthClipAlpha); cmd.SetComputeTextureParam(shader, kernelIndex, "YCoCgColor", _colorLuma); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessResources.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessResources.cs index ce92f00..8d62f44 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessResources.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessResources.cs @@ -140,6 +140,11 @@ namespace UnityEngine.Rendering.PostProcessing /// public Shader screenSpaceReflections; + /// + /// The shaders used by the SnapDragon Game Super Resolution 2 (SGSR2) Upscaler. + /// + public SGSR2.Shaders sgsr2Upscaler; + /// /// Returns a copy of this class and its content. /// From 72df3473155066e5c2fc6f125566e0876ffb5112 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Wed, 25 Dec 2024 15:40:55 +0100 Subject: [PATCH 24/88] Reworked 2-pass FS implementation to integrate more directly with the PPV2 framework, allowing for multi-target rendering using the fullscreen triangle blit, which eliminates the need for an extra blit at the end. Also fixed a bug where the scale factor was inverted, which caused FS upscaling output to be blurry. --- .../SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl | 4 ++-- .../2_pass_fs/{sgsr2.shader => sgsr2_ppv2.shader} | 10 +++++++--- .../{sgsr2.shader.meta => sgsr2_ppv2.shader.meta} | 2 +- .../SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl | 5 +++-- .../Runtime/Effects/Upscaling/SGSR2Upscaler.cs | 2 +- .../Effects/Upscaling/SGSR2Upscaler_2PassFS.cs | 12 +++++++++--- 6 files changed, 23 insertions(+), 12 deletions(-) rename Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/{sgsr2.shader => sgsr2_ppv2.shader} (64%) rename Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/{sgsr2.shader.meta => sgsr2_ppv2.shader.meta} (80%) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl index abbf9e8..e3367ea 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl @@ -18,9 +18,9 @@ TYPED_TEXTURE2D_X(half2, _CameraMotionVectorsTexture); #define InputDepth _CameraDepthTexture #define InputVelocity _CameraMotionVectorsTexture -void sgsr2_convert(v2f_img i, out float4 MotionDepthClipAlphaBuffer: SV_Target) +void sgsr2_convert(VaryingsDefault i, out float4 MotionDepthClipAlphaBuffer: SV_Target) { - const half2 texCoord = i.uv; + const half2 texCoord = i.texcoord; uint2 InputPos = uint2(texCoord * renderSize); float2 gatherCoord = texCoord - 0.5f * renderSizeRcp; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2.shader b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_ppv2.shader similarity index 64% rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2.shader rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_ppv2.shader index 84b89ab..7e7f176 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2.shader +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_ppv2.shader @@ -1,4 +1,4 @@ -Shader "TND/sgsr2_2pass_fs" +Shader "TND/PPV2/sgsr2_2pass_fs" { SubShader { @@ -9,11 +9,13 @@ Shader "TND/sgsr2_2pass_fs" Name "Convert" HLSLPROGRAM - #pragma vertex vert_img + #pragma vertex VertDefault #pragma fragment sgsr2_convert #pragma target 4.5 #pragma enable_d3d11_debug_symbols + #define UNITY_CG_INCLUDED + #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/StdLib.hlsl" #include "sgsr2_convert.hlsl" ENDHLSL } @@ -23,11 +25,13 @@ Shader "TND/sgsr2_2pass_fs" Name "Upscale" HLSLPROGRAM - #pragma vertex vert_img + #pragma vertex VertDefault #pragma fragment sgsr2_upscale #pragma target 4.5 #pragma enable_d3d11_debug_symbols + #define UNITY_CG_INCLUDED + #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/StdLib.hlsl" #include "sgsr2_upscale.hlsl" ENDHLSL } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2.shader.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_ppv2.shader.meta similarity index 80% rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2.shader.meta rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_ppv2.shader.meta index 435c493..bad8ad4 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2.shader.meta +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_ppv2.shader.meta @@ -1,5 +1,5 @@ fileFormatVersion: 2 -guid: 9e367486dadedbc4da8313a481aa8a27 +guid: 59bc1035dd975f64d8141148a7088d0a ShaderImporter: externalObjects: {} defaultTextures: [] diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl index 06da4d7..7f8183d 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl @@ -21,9 +21,9 @@ SamplerState samplerPrevOutput; SamplerState samplerMotionDepthClipAlphaBuffer; SamplerState samplerInputColor; -void sgsr2_upscale(v2f_img i, out half4 Output: SV_Target) +void sgsr2_upscale(VaryingsDefault i, out half4 Output: SV_Target0, out half4 HistoryOutput: SV_Target1) { - const half2 texCoord = i.uv; + const half2 texCoord = i.texcoord; half Biasmax_viewportXScale = scaleRatio.x; half scalefactor = scaleRatio.y; @@ -262,4 +262,5 @@ void sgsr2_upscale(v2f_img i, out half4 Output: SV_Target) Upsampledcw.xyz = lerp(HistoryColor, Upsampledcw.xyz, alpha); Output = half4(Upsampledcw.xyz, 0.0); + HistoryOutput = Output; } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs index c386d59..6c9ef84 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs @@ -70,7 +70,7 @@ namespace UnityEngine.Rendering.PostProcessing parms.cameraFovAngleHor = Mathf.Tan(context.camera.fieldOfView * Mathf.Deg2Rad * 0.5f) * parms.renderSize.x * parms.renderSizeRcp.y; parms.cameraNear = context.camera.nearClipPlane; parms.minLerpContribution = 0f; - parms.scaleRatio = new Vector2(parms.renderSize.x * parms.displaySizeRcp.x, parms.renderSize.y * parms.displaySizeRcp.y); + parms.scaleRatio = new Vector2(parms.displaySize.x * parms.renderSizeRcp.x, parms.displaySize.y * parms.renderSizeRcp.y); parms.bSameCamera = isCameraStill ? 1u : 0u; parms.reset = config.Reset ? 1u : 0u; _paramsBuffer.UpdateBufferData(cmd); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassFS.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassFS.cs index 2f67b19..df39c3f 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassFS.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassFS.cs @@ -7,6 +7,7 @@ namespace UnityEngine.Rendering.PostProcessing protected override string VariantName => "SGSR2 2-Pass Fragment"; private Material _material; + private readonly RenderTargetIdentifier[] _mrt = new RenderTargetIdentifier[2]; public override void CreateContext(PostProcessRenderContext context, Upscaling config) { @@ -26,14 +27,19 @@ namespace UnityEngine.Rendering.PostProcessing { uint frameIndex = _frameCount % 2; + // TODO: try using a PropertySheet here again, now that PPV2 is functional cmd.SetGlobalTexture("InputColor", context.source); cmd.SetGlobalTexture("MotionDepthClipAlphaBuffer", _motionDepthClipAlpha); cmd.SetGlobalTexture("PrevOutput", _upscaleHistory[frameIndex ^ 1]); cmd.SetGlobalConstantBuffer(_paramsBuffer, "cbSGSR2", 0, Marshal.SizeOf()); + + cmd.SetRenderTarget(_motionDepthClipAlpha, context.source); + cmd.DrawMesh(RuntimeUtilities.fullscreenTriangle, Matrix4x4.identity, _material, 0, 0); - cmd.Blit(BuiltinRenderTextureType.None, _motionDepthClipAlpha, _material, 0); - cmd.Blit(BuiltinRenderTextureType.None, _upscaleHistory[frameIndex], _material, 1); - cmd.Blit(_upscaleHistory[frameIndex], context.destination); + _mrt[0] = context.destination; + _mrt[1] = _upscaleHistory[frameIndex]; + cmd.SetRenderTarget(_mrt, context.destination); + cmd.DrawMesh(RuntimeUtilities.fullscreenTriangle, Matrix4x4.identity, _material, 0, 1); } } } From df58df61d6a7e05577d93e2702f2c1167fa8e683 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 28 Dec 2024 14:29:34 +0100 Subject: [PATCH 25/88] Reorganized shader code such that standard BiRP and PPV2 can share the same common code with only some light wrappers to implement the differences --- .../PostProcessing/PostProcessResources.asset | 2 +- .../SGSR2/Shaders/2_pass_fs/sgsr2_birp.shader | 49 +++++++++++++++++++ .../Shaders/2_pass_fs/sgsr2_birp.shader.meta | 3 ++ .../Shaders/2_pass_fs/sgsr2_convert.hlsl | 4 +- .../SGSR2/Shaders/2_pass_fs/sgsr2_ppv2.shader | 25 ++++++++-- .../Shaders/2_pass_fs/sgsr2_upscale.hlsl | 5 +- 6 files changed, 76 insertions(+), 12 deletions(-) create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_birp.shader create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_birp.shader.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset index 6041f70..954f3a9 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset @@ -115,7 +115,7 @@ MonoBehaviour: multiScaleAO: {fileID: 4800000, guid: 67f9497810829eb4791ec19e95781e51, type: 3} screenSpaceReflections: {fileID: 4800000, guid: f997a3dc9254c44459323cced085150c, type: 3} sgsr2Upscaler: - twoPassFragment: {fileID: 4800000, guid: 9e367486dadedbc4da8313a481aa8a27, type: 3} + twoPassFragment: {fileID: 4800000, guid: 59bc1035dd975f64d8141148a7088d0a, type: 3} computeShaders: autoExposure: {fileID: 7200000, guid: 34845e0ca016b7448842e965db5890a5, type: 3} exposureHistogram: {fileID: 7200000, guid: 8c2fcbdf9bc58664f89917f7b9d79501, type: 3} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_birp.shader b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_birp.shader new file mode 100644 index 0000000..7202ee0 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_birp.shader @@ -0,0 +1,49 @@ +Shader "TND/sgsr2_2pass_fs" +{ + SubShader + { + Cull Off ZWrite Off ZTest Always + + Pass + { + Name "Convert" + + HLSLPROGRAM + #pragma vertex vert_img + #pragma fragment frag_convert + #pragma target 4.5 + #pragma enable_d3d11_debug_symbols + + #include "../sgsr2_birp.hlsl" + #include "sgsr2_convert.hlsl" + + void frag_convert(v2f_img i, out float4 MotionDepthClipAlphaBuffer: SV_Target) + { + sgsr2_convert(i.uv, MotionDepthClipAlphaBuffer); + } + + ENDHLSL + } + + Pass + { + Name "Upscale" + + HLSLPROGRAM + #pragma vertex vert_img + #pragma fragment frag_upscale + #pragma target 4.5 + #pragma enable_d3d11_debug_symbols + + #include "../sgsr2_birp.hlsl" + #include "sgsr2_upscale.hlsl" + + void frag_upscale(v2f_img i, out half4 OutputColor: SV_Target) + { + sgsr2_upscale(i.uv, OutputColor); + } + + ENDHLSL + } + } +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_birp.shader.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_birp.shader.meta new file mode 100644 index 0000000..6098da7 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_birp.shader.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: 4451c8b25af942ccaa8d6d0bb46a8e60 +timeCreated: 1735392260 \ No newline at end of file diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl index e3367ea..a4b9e7b 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl @@ -1,4 +1,3 @@ -#include "../sgsr2_birp.hlsl" #include "../sgsr2_common.hlsl" //============================================================================================================ @@ -18,9 +17,8 @@ TYPED_TEXTURE2D_X(half2, _CameraMotionVectorsTexture); #define InputDepth _CameraDepthTexture #define InputVelocity _CameraMotionVectorsTexture -void sgsr2_convert(VaryingsDefault i, out float4 MotionDepthClipAlphaBuffer: SV_Target) +void sgsr2_convert(const half2 texCoord, out float4 MotionDepthClipAlphaBuffer) { - const half2 texCoord = i.texcoord; uint2 InputPos = uint2(texCoord * renderSize); float2 gatherCoord = texCoord - 0.5f * renderSizeRcp; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_ppv2.shader b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_ppv2.shader index 7e7f176..bf4ecc7 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_ppv2.shader +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_ppv2.shader @@ -4,35 +4,52 @@ Shader "TND/PPV2/sgsr2_2pass_fs" { Cull Off ZWrite Off ZTest Always - Pass // Convert + Pass { Name "Convert" HLSLPROGRAM #pragma vertex VertDefault - #pragma fragment sgsr2_convert + #pragma fragment FragConvert #pragma target 4.5 #pragma enable_d3d11_debug_symbols #define UNITY_CG_INCLUDED #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/StdLib.hlsl" + #undef EPSILON + #include "../sgsr2_birp.hlsl" #include "sgsr2_convert.hlsl" + + void FragConvert(VaryingsDefault i, out float4 MotionDepthClipAlphaBuffer: SV_Target) + { + sgsr2_convert(i.texcoord, MotionDepthClipAlphaBuffer); + } + ENDHLSL } - Pass // Upscale + Pass { Name "Upscale" HLSLPROGRAM #pragma vertex VertDefault - #pragma fragment sgsr2_upscale + #pragma fragment FragUpscale #pragma target 4.5 #pragma enable_d3d11_debug_symbols #define UNITY_CG_INCLUDED #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/StdLib.hlsl" + #undef EPSILON + #include "../sgsr2_birp.hlsl" #include "sgsr2_upscale.hlsl" + + void FragUpscale(VaryingsDefault i, out half4 OutputColor: SV_Target0, out half4 HistoryOutput: SV_Target1) + { + sgsr2_upscale(i.texcoord, OutputColor); + HistoryOutput = OutputColor; + } + ENDHLSL } } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl index 7f8183d..62904ac 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl @@ -1,4 +1,3 @@ -#include "../sgsr2_birp.hlsl" #include "../sgsr2_common.hlsl" //============================================================================================================ @@ -21,9 +20,8 @@ SamplerState samplerPrevOutput; SamplerState samplerMotionDepthClipAlphaBuffer; SamplerState samplerInputColor; -void sgsr2_upscale(VaryingsDefault i, out half4 Output: SV_Target0, out half4 HistoryOutput: SV_Target1) +void sgsr2_upscale(const half2 texCoord, out half4 Output) { - const half2 texCoord = i.texcoord; half Biasmax_viewportXScale = scaleRatio.x; half scalefactor = scaleRatio.y; @@ -262,5 +260,4 @@ void sgsr2_upscale(VaryingsDefault i, out half4 Output: SV_Target0, out half4 Hi Upsampledcw.xyz = lerp(HistoryColor, Upsampledcw.xyz, alpha); Output = half4(Upsampledcw.xyz, 0.0); - HistoryOutput = Output; } From 9f4b96ea2589d8867c5bfed80cb5a402ceaece89 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 28 Dec 2024 14:46:20 +0100 Subject: [PATCH 26/88] Made integration more PPV2-like by using property sheets, material property blocks and the BlitFullScreenTriangle method. --- .../Upscaling/SGSR2Upscaler_2PassFS.cs | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassFS.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassFS.cs index df39c3f..1fca768 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassFS.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassFS.cs @@ -6,19 +6,19 @@ namespace UnityEngine.Rendering.PostProcessing { protected override string VariantName => "SGSR2 2-Pass Fragment"; - private Material _material; + private PropertySheet _sheet; private readonly RenderTargetIdentifier[] _mrt = new RenderTargetIdentifier[2]; public override void CreateContext(PostProcessRenderContext context, Upscaling config) { base.CreateContext(context, config); - _material = new Material(context.resources.shaders.sgsr2Upscaler.twoPassFragment); + _sheet = new PropertySheet(new Material(context.resources.shaders.sgsr2Upscaler.twoPassFragment)); } public override void DestroyContext() { - RuntimeUtilities.Destroy(_material); + _sheet.Release(); base.DestroyContext(); } @@ -27,19 +27,16 @@ namespace UnityEngine.Rendering.PostProcessing { uint frameIndex = _frameCount % 2; - // TODO: try using a PropertySheet here again, now that PPV2 is functional cmd.SetGlobalTexture("InputColor", context.source); - cmd.SetGlobalTexture("MotionDepthClipAlphaBuffer", _motionDepthClipAlpha); - cmd.SetGlobalTexture("PrevOutput", _upscaleHistory[frameIndex ^ 1]); - cmd.SetGlobalConstantBuffer(_paramsBuffer, "cbSGSR2", 0, Marshal.SizeOf()); - - cmd.SetRenderTarget(_motionDepthClipAlpha, context.source); - cmd.DrawMesh(RuntimeUtilities.fullscreenTriangle, Matrix4x4.identity, _material, 0, 0); + _sheet.properties.SetTexture("MotionDepthClipAlphaBuffer", _motionDepthClipAlpha); + _sheet.properties.SetTexture("PrevOutput", _upscaleHistory[frameIndex ^ 1]); + _sheet.properties.SetConstantBuffer("cbSGSR2", _paramsBuffer, 0, Marshal.SizeOf()); + cmd.BlitFullscreenTriangle(BuiltinRenderTextureType.None, _motionDepthClipAlpha, _sheet, 0); + _mrt[0] = context.destination; _mrt[1] = _upscaleHistory[frameIndex]; - cmd.SetRenderTarget(_mrt, context.destination); - cmd.DrawMesh(RuntimeUtilities.fullscreenTriangle, Matrix4x4.identity, _material, 0, 1); + cmd.BlitFullscreenTriangle(BuiltinRenderTextureType.None, _mrt, BuiltinRenderTextureType.None, _sheet, 1); } } } From 9971d85ce4234a0f5825f3bdfd5db634dfe6a55d Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Mon, 20 Jan 2025 18:44:38 +0100 Subject: [PATCH 27/88] Changed output texture formats from half to float, fixes Metal shader compilation issues on iOS --- .../Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute | 2 +- .../Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute | 4 ++-- .../Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute | 4 ++-- .../Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute | 4 ++-- .../Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute index 5d4711e..5bfb16e 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute @@ -13,7 +13,7 @@ TEXTURE2D_X(InputColor) : register(t0); TYPED_TEXTURE2D_X(float, InputDepth) : register(t1); TYPED_TEXTURE2D_X(float2, InputVelocity) : register(t2); -RW_TEXTURE2D_X(half4, MotionDepthClipAlphaBuffer) : register(u0); +RW_TEXTURE2D_X(float4, MotionDepthClipAlphaBuffer) : register(u0); RW_TEXTURE2D_X(uint, YCoCgColor) : register(u1); [numthreads(8, 8, 1)] diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute index 11d11c2..62c689d 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute @@ -14,8 +14,8 @@ TEXTURE2D_X(PrevHistoryOutput) : register(t0); TEXTURE2D_X(MotionDepthClipAlphaBuffer) : register(t1); TYPED_TEXTURE2D_X(uint, YCoCgColor) : register(t2); -RW_TEXTURE2D_X(half4, SceneColorOutput) : register(u0); -RW_TEXTURE2D_X(half4, HistoryOutput) : register(u1); +RW_TEXTURE2D_X(float4, SceneColorOutput) : register(u0); +RW_TEXTURE2D_X(float4, HistoryOutput) : register(u1); [numthreads(8, 8, 1)] void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute index 7527649..d925608 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute @@ -11,9 +11,9 @@ //============================================================================================================ TYPED_TEXTURE2D_X(uint, PrevLumaHistory) : register(t0); -TYPED_TEXTURE2D_X(half4, MotionDepthAlphaBuffer) : register(t1); +TEXTURE2D_X(MotionDepthAlphaBuffer) : register(t1); TYPED_TEXTURE2D_X(uint, YCoCgColor) : register(t2); -RW_TEXTURE2D_X(half4, MotionDepthClipAlphaBuffer) : register(u0); +RW_TEXTURE2D_X(float4, MotionDepthClipAlphaBuffer) : register(u0); RW_TEXTURE2D_X(uint, LumaHistory) : register(u1); [numthreads(8, 8, 1)] diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute index e049225..e1cd580 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute @@ -14,7 +14,7 @@ TEXTURE2D_X(InputOpaqueColor) : register(t0); TEXTURE2D_X(InputColor) : register(t1); TYPED_TEXTURE2D_X(float, InputDepth) : register(t2); TYPED_TEXTURE2D_X(float2, InputVelocity) : register(t3); -RW_TEXTURE2D_X(half4, MotionDepthAlphaBuffer) : register(u0); +RW_TEXTURE2D_X(float4, MotionDepthAlphaBuffer) : register(u0); RW_TEXTURE2D_X(uint, YCoCgColor) : register(u1); [numthreads(8, 8, 1)] @@ -100,5 +100,5 @@ void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) alpha_mask = (0.35f * 1000.0f) * alpha_mask; YCoCgColor[COORD_TEXTURE2D_X(InputPos)] = ((x11 << 21u) | (y11 << 10u)) | z10; - MotionDepthAlphaBuffer[COORD_TEXTURE2D_X(InputPos)] = half4(motion, NearestZ, alpha_mask); + MotionDepthAlphaBuffer[COORD_TEXTURE2D_X(InputPos)] = float4(motion, NearestZ, alpha_mask); } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute index d96307d..727d358 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute @@ -14,8 +14,8 @@ TEXTURE2D_X(PrevHistoryOutput) : register(t0); TEXTURE2D_X(MotionDepthClipAlphaBuffer) : register(t1); TYPED_TEXTURE2D_X(uint, YCoCgColor) : register(t2); -RW_TEXTURE2D_X(half4, SceneColorOutput) : register(u0); -RW_TEXTURE2D_X(half4, HistoryOutput) : register(u1); +RW_TEXTURE2D_X(float4, SceneColorOutput) : register(u0); +RW_TEXTURE2D_X(float4, HistoryOutput) : register(u1); [numthreads(8, 8, 1)] void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) From 299200cb9c24638017e85ca9714cc2f5a6527749 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Mon, 20 Jan 2025 19:12:45 +0100 Subject: [PATCH 28/88] Ported over jitter UV fixes from Snapdragon GitHub repo --- .../Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute | 4 ++-- .../Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl | 4 ++-- .../Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute index 62c689d..53be6e6 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute @@ -31,8 +31,8 @@ void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) float2 InputInfoViewportSize = float2(renderSize); float2 Hruv = (float2(gl_GlobalInvocationID.xy) + 0.5f) * HistoryInfoViewportSizeInverse; float2 Jitteruv; - Jitteruv.x = clamp(Hruv.x + (InputJitter.x * HistoryInfoViewportSizeInverse.x), 0.0, 1.0); - Jitteruv.y = clamp(Hruv.y + (InputJitter.y * HistoryInfoViewportSizeInverse.y), 0.0, 1.0); + Jitteruv.x = clamp(Hruv.x + (InputJitter.x * renderSizeRcp.x), 0.0, 1.0); + Jitteruv.y = clamp(Hruv.y + (InputJitter.y * renderSizeRcp.y), 0.0, 1.0); int2 InputPos = int2(Jitteruv * InputInfoViewportSize); float4 mda = SAMPLE_TEXTURE2D_X_LOD(MotionDepthClipAlphaBuffer, S_LINEAR_CLAMP, Jitteruv, 0).xyzw; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl index 62904ac..9466d98 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl @@ -28,8 +28,8 @@ void sgsr2_upscale(const half2 texCoord, out half4 Output) float2 Hruv = texCoord; float2 Jitteruv; - Jitteruv.x = clamp(Hruv.x + (jitterOffset.x * displaySizeRcp.x), 0.0, 1.0); - Jitteruv.y = clamp(Hruv.y + (jitterOffset.y * displaySizeRcp.y), 0.0, 1.0); + Jitteruv.x = clamp(Hruv.x + (jitterOffset.x * renderSizeRcp.x), 0.0, 1.0); + Jitteruv.y = clamp(Hruv.y + (jitterOffset.y * renderSizeRcp.y), 0.0, 1.0); int2 InputPos = int2(Jitteruv * renderSize); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute index 727d358..59b24e5 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute @@ -31,8 +31,8 @@ void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID) float2 InputInfoViewportSize = float2(renderSize); float2 Hruv = (float2(gl_GlobalInvocationID.xy) + 0.5f) * HistoryInfoViewportSizeInverse; float2 Jitteruv; - Jitteruv.x = clamp(Hruv.x + (InputJitter.x * HistoryInfoViewportSizeInverse.x), 0.0, 1.0); - Jitteruv.y = clamp(Hruv.y + (InputJitter.y * HistoryInfoViewportSizeInverse.y), 0.0, 1.0); + Jitteruv.x = clamp(Hruv.x + (InputJitter.x * renderSizeRcp.x), 0.0, 1.0); + Jitteruv.y = clamp(Hruv.y + (InputJitter.y * renderSizeRcp.y), 0.0, 1.0); int2 InputPos = int2(Jitteruv * InputInfoViewportSize); From 66059cb742b89f2601195247ea25a94f0ca49cf0 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Wed, 19 Mar 2025 19:09:29 +0100 Subject: [PATCH 29/88] Imported ARM ASR shader code for HLSL without any modifications --- .../Runtime/Effects/Upscaling/ASR.meta | 8 + .../Effects/Upscaling/ASR/Shaders.meta | 8 + .../Upscaling/ASR/Shaders/shaders.meta | 8 + .../ASR/Shaders/shaders/ffxm_common_types.h | 526 +++ .../Shaders/shaders/ffxm_common_types.h.meta | 67 + .../Upscaling/ASR/Shaders/shaders/ffxm_core.h | 69 + .../ASR/Shaders/shaders/ffxm_core.h.meta | 67 + .../ASR/Shaders/shaders/ffxm_core_cpu.h | 337 ++ .../ASR/Shaders/shaders/ffxm_core_cpu.h.meta | 67 + .../Shaders/shaders/ffxm_core_gpu_common.h | 2812 ++++++++++++++++ .../shaders/ffxm_core_gpu_common.h.meta | 67 + .../shaders/ffxm_core_gpu_common_half.h | 2978 +++++++++++++++++ .../shaders/ffxm_core_gpu_common_half.h.meta | 67 + .../ASR/Shaders/shaders/ffxm_core_hlsl.h | 1643 +++++++++ .../ASR/Shaders/shaders/ffxm_core_hlsl.h.meta | 67 + .../Shaders/shaders/ffxm_core_portability.h | 50 + .../shaders/ffxm_core_portability.h.meta | 67 + .../shaders/ffxm_fsr2_accumulate_pass_fs.hlsl | 103 + .../ffxm_fsr2_accumulate_pass_fs.hlsl.meta | 7 + .../ffxm_fsr2_autogen_reactive_pass_fs.hlsl | 83 + ...xm_fsr2_autogen_reactive_pass_fs.hlsl.meta | 7 + ...m_fsr2_compute_luminance_pyramid_pass.hlsl | 66 + ...2_compute_luminance_pyramid_pass.hlsl.meta | 7 + .../shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl | 71 + .../ffxm_fsr2_depth_clip_pass_fs.hlsl.meta | 7 + .../Shaders/shaders/ffxm_fsr2_lock_pass.hlsl | 66 + .../shaders/ffxm_fsr2_lock_pass.hlsl.meta | 7 + .../shaders/ffxm_fsr2_rcas_pass_fs.hlsl | 60 + .../shaders/ffxm_fsr2_rcas_pass_fs.hlsl.meta | 7 + ...r2_reconstruct_previous_depth_pass_fs.hlsl | 69 + ...construct_previous_depth_pass_fs.hlsl.meta | 7 + .../ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl | 50 + .../Shaders/shaders/ffxm_fsr2_vs.hlsl.meta | 7 + .../Upscaling/ASR/Shaders/shaders/fsr1.meta | 8 + .../ASR/Shaders/shaders/fsr1/ffxm_fsr1.h | 1251 +++++++ .../ASR/Shaders/shaders/fsr1/ffxm_fsr1.h.meta | 76 + .../Upscaling/ASR/Shaders/shaders/fsr2.meta | 8 + .../shaders/fsr2/ffxm_fsr2_accumulate.h | 380 +++ .../shaders/fsr2/ffxm_fsr2_accumulate.h.meta | 67 + .../shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h | 1014 ++++++ .../fsr2/ffxm_fsr2_callbacks_hlsl.h.meta | 67 + .../Shaders/shaders/fsr2/ffxm_fsr2_common.h | 595 ++++ .../shaders/fsr2/ffxm_fsr2_common.h.meta | 67 + .../ffxm_fsr2_compute_luminance_pyramid.h | 211 ++ ...ffxm_fsr2_compute_luminance_pyramid.h.meta | 67 + .../shaders/fsr2/ffxm_fsr2_depth_clip.h | 349 ++ .../shaders/fsr2/ffxm_fsr2_depth_clip.h.meta | 67 + .../ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h | 131 + .../shaders/fsr2/ffxm_fsr2_lock.h.meta | 67 + .../fsr2/ffxm_fsr2_postprocess_lock_status.h | 101 + .../ffxm_fsr2_postprocess_lock_status.h.meta | 67 + .../ASR/Shaders/shaders/fsr2/ffxm_fsr2_rcas.h | 91 + .../shaders/fsr2/ffxm_fsr2_rcas.h.meta | 67 + ...ruct_dilated_velocity_and_previous_depth.h | 155 + ...dilated_velocity_and_previous_depth.h.meta | 67 + .../shaders/fsr2/ffxm_fsr2_reproject.h | 386 +++ .../shaders/fsr2/ffxm_fsr2_reproject.h.meta | 67 + .../shaders/fsr2/ffxm_fsr2_resources.h | 100 + .../shaders/fsr2/ffxm_fsr2_resources.h.meta | 67 + .../Shaders/shaders/fsr2/ffxm_fsr2_sample.h | 699 ++++ .../shaders/fsr2/ffxm_fsr2_sample.h.meta | 67 + .../Shaders/shaders/fsr2/ffxm_fsr2_upsample.h | 195 ++ .../shaders/fsr2/ffxm_fsr2_upsample.h.meta | 67 + .../Upscaling/ASR/Shaders/shaders/spd.meta | 8 + .../ASR/Shaders/shaders/spd/ffxm_spd.h | 1013 ++++++ .../ASR/Shaders/shaders/spd/ffxm_spd.h.meta | 76 + 66 files changed, 17250 insertions(+) create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core.h create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core.h.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_cpu.h create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_cpu.h.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common.h create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common.h.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common_half.h create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common_half.h.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_hlsl.h create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_hlsl.h.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_portability.h create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_portability.h.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_lock_pass.hlsl create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_lock_pass.hlsl.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr1.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr1/ffxm_fsr1.h create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr1/ffxm_fsr1.h.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_common.h create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_common.h.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_depth_clip.h create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_depth_clip.h.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_postprocess_lock_status.h create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_postprocess_lock_status.h.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_rcas.h create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_rcas.h.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reconstruct_dilated_velocity_and_previous_depth.h create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reconstruct_dilated_velocity_and_previous_depth.h.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_resources.h create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_resources.h.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_sample.h create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_sample.h.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_upsample.h create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_upsample.h.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd/ffxm_spd.h create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd/ffxm_spd.h.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR.meta new file mode 100644 index 0000000..1c71416 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: fbb474d5e9430814eb7b83620c3d4189 +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders.meta new file mode 100644 index 0000000..57ef699 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: d231e3fb22497e3448f149c48709d08d +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders.meta new file mode 100644 index 0000000..806a4d1 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: f7cc575273c4b124596cac0be2abd8ff +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h new file mode 100644 index 0000000..fb5e13a --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h @@ -0,0 +1,526 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#ifndef FFXM_COMMON_TYPES_H +#define FFXM_COMMON_TYPES_H + +#if defined(FFXM_CPU) +#define FFXM_PARAMETER_IN +#define FFXM_PARAMETER_OUT +#define FFXM_PARAMETER_INOUT +#define FFXM_PARAMETER_UNIFORM +#elif defined(FFXM_HLSL) +#define FFXM_PARAMETER_IN in +#define FFXM_PARAMETER_OUT out +#define FFXM_PARAMETER_INOUT inout +#define FFXM_PARAMETER_UNIFORM uniform +#elif defined(FFXM_GLSL) +#define FFXM_PARAMETER_IN in +#define FFXM_PARAMETER_OUT out +#define FFXM_PARAMETER_INOUT inout +#define FFXM_PARAMETER_UNIFORM const //[cacao_placeholder] until a better fit is found! +#endif // #if defined(FFXM_CPU) + +#if defined(FFXM_CPU) +/// A typedef for a boolean value. +/// +/// @ingroup CPUTypes +typedef bool FfxBoolean; + +/// A typedef for a unsigned 8bit integer. +/// +/// @ingroup CPUTypes +typedef uint8_t FfxUInt8; + +/// A typedef for a unsigned 16bit integer. +/// +/// @ingroup CPUTypes +typedef uint16_t FfxUInt16; + +/// A typedef for a unsigned 32bit integer. +/// +/// @ingroup CPUTypes +typedef uint32_t FfxUInt32; + +/// A typedef for a unsigned 64bit integer. +/// +/// @ingroup CPUTypes +typedef uint64_t FfxUInt64; + +/// A typedef for a signed 8bit integer. +/// +/// @ingroup CPUTypes +typedef int8_t FfxInt8; + +/// A typedef for a signed 16bit integer. +/// +/// @ingroup CPUTypes +typedef int16_t FfxInt16; + +/// A typedef for a signed 32bit integer. +/// +/// @ingroup CPUTypes +typedef int32_t FfxInt32; + +/// A typedef for a signed 64bit integer. +/// +/// @ingroup CPUTypes +typedef int64_t FfxInt64; + +/// A typedef for a floating point value. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32; + +/// A typedef for a 2-dimensional floating point value. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x2[2]; + +/// A typedef for a 3-dimensional floating point value. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x3[3]; + +/// A typedef for a 4-dimensional floating point value. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x4[4]; + +/// A typedef for a 2-dimensional 32bit unsigned integer. +/// +/// @ingroup CPUTypes +typedef uint32_t FfxUInt32x2[2]; + +/// A typedef for a 3-dimensional 32bit unsigned integer. +/// +/// @ingroup CPUTypes +typedef uint32_t FfxUInt32x3[3]; + +/// A typedef for a 4-dimensional 32bit unsigned integer. +/// +/// @ingroup CPUTypes +typedef uint32_t FfxUInt32x4[4]; +#endif // #if defined(FFXM_CPU) + +#if defined(FFXM_HLSL) + +// Unless defined, go for the conservative option. +#if !defined(FFXM_HLSL_6_2) +#define FFXM_HLSL_6_2 (0) +#endif + +#define FfxFloat32Mat4 matrix +#define FfxFloat32Mat3 matrix + +/// A typedef for a boolean value. +/// +/// @ingroup HLSLTypes +typedef bool FfxBoolean; + +#if FFXM_HLSL_6_2 + +/// @defgroup HLSL62Types HLSL 6.2 And Above Types +/// HLSL 6.2 and above type defines for all commonly used variables +/// +/// @ingroup HLSLTypes + +/// A typedef for a floating point value. +/// +/// @ingroup HLSL62Types +typedef float32_t FfxFloat32; + +/// A typedef for a 2-dimensional floating point value. +/// +/// @ingroup HLSL62Types +typedef float32_t2 FfxFloat32x2; + +/// A typedef for a 3-dimensional floating point value. +/// +/// @ingroup HLSL62Types +typedef float32_t3 FfxFloat32x3; + +/// A typedef for a 4-dimensional floating point value. +/// +/// @ingroup HLSL62Types +typedef float32_t4 FfxFloat32x4; + +/// A [cacao_placeholder] typedef for matrix type until confirmed. +typedef float4x4 FfxFloat32x4x4; +typedef float3x3 FfxFloat32x3x3; +typedef float2x2 FfxFloat32x2x2; + +/// A typedef for a unsigned 32bit integer. +/// +/// @ingroup HLSL62Types +typedef uint32_t FfxUInt32; + +/// A typedef for a 2-dimensional 32bit unsigned integer. +/// +/// @ingroup HLSL62Types +typedef uint32_t2 FfxUInt32x2; + +/// A typedef for a 3-dimensional 32bit unsigned integer. +/// +/// @ingroup HLSL62Types +typedef uint32_t3 FfxUInt32x3; + +/// A typedef for a 4-dimensional 32bit unsigned integer. +/// +/// @ingroup HLSL62Types +typedef uint32_t4 FfxUInt32x4; + +/// A typedef for a signed 32bit integer. +/// +/// @ingroup HLSL62Types +typedef int32_t FfxInt32; + +/// A typedef for a 2-dimensional signed 32bit integer. +/// +/// @ingroup HLSL62Types +typedef int32_t2 FfxInt32x2; + +/// A typedef for a 3-dimensional signed 32bit integer. +/// +/// @ingroup HLSL62Types +typedef int32_t3 FfxInt32x3; + +/// A typedef for a 4-dimensional signed 32bit integer. +/// +/// @ingroup HLSL62Types +typedef int32_t4 FfxInt32x4; + +#else // #if defined(FFXM_HLSL_6_2) + +/// @defgroup HLSLBaseTypes HLSL 6.1 And Below Types +/// HLSL 6.1 and below type defines for all commonly used variables +/// +/// @ingroup HLSLTypes + +#define FfxFloat32 float +#define FfxFloat32x2 float2 +#define FfxFloat32x3 float3 +#define FfxFloat32x4 float4 + +/// A [cacao_placeholder] typedef for matrix type until confirmed. +#define FfxFloat32x4x4 float4x4 +#define FfxFloat32x3x3 float3x3 +#define FfxFloat32x2x2 float2x2 + +/// A typedef for a unsigned 32bit integer. +/// +/// @ingroup GPU +typedef uint FfxUInt32; +typedef uint2 FfxUInt32x2; +typedef uint3 FfxUInt32x3; +typedef uint4 FfxUInt32x4; + +typedef int FfxInt32; +typedef int2 FfxInt32x2; +typedef int3 FfxInt32x3; +typedef int4 FfxInt32x4; + +#endif // #if defined(FFXM_HLSL_6_2) + +// Arm ASR relies in efficient FP16 arithmetic. +#if !defined(FFXM_HALF) +#define FFXM_HALF (1) +#endif + +#if FFXM_HALF + +#if FFXM_HLSL_6_2 + +typedef float16_t FfxFloat16; +typedef float16_t2 FfxFloat16x2; +typedef float16_t3 FfxFloat16x3; +typedef float16_t4 FfxFloat16x4; + +/// A typedef for an unsigned 16bit integer. +/// +/// @ingroup HLSLTypes +typedef uint16_t FfxUInt16; +typedef uint16_t2 FfxUInt16x2; +typedef uint16_t3 FfxUInt16x3; +typedef uint16_t4 FfxUInt16x4; + +/// A typedef for a signed 16bit integer. +/// +/// @ingroup HLSLTypes +typedef int16_t FfxInt16; +typedef int16_t2 FfxInt16x2; +typedef int16_t3 FfxInt16x3; +typedef int16_t4 FfxInt16x4; +#else // #if FFXM_HLSL_6_2 +typedef min16float FfxFloat16; +typedef min16float2 FfxFloat16x2; +typedef min16float3 FfxFloat16x3; +typedef min16float4 FfxFloat16x4; + +/// A typedef for an unsigned 16bit integer. +/// +/// @ingroup HLSLTypes +typedef min16uint FfxUInt16; +typedef min16uint2 FfxUInt16x2; +typedef min16uint3 FfxUInt16x3; +typedef min16uint4 FfxUInt16x4; + +/// A typedef for a signed 16bit integer. +/// +/// @ingroup HLSLTypes +typedef min16int FfxInt16; +typedef min16int2 FfxInt16x2; +typedef min16int3 FfxInt16x3; +typedef min16int4 FfxInt16x4; +#endif // #if FFXM_HLSL_6_2 + +#endif // FFXM_HALF + +#endif // #if defined(FFXM_HLSL) + +#if defined(FFXM_GLSL) + +#define FfxFloat32Mat4 mat4 +#define FfxFloat32Mat3 mat3 + +/// A typedef for a boolean value. +/// +/// @ingroup GLSLTypes +#define FfxBoolean bool +#define FfxFloat32 float +#define FfxFloat32x2 vec2 +#define FfxFloat32x3 vec3 +#define FfxFloat32x4 vec4 +#define FfxUInt32 uint +#define FfxUInt32x2 uvec2 +#define FfxUInt32x3 uvec3 +#define FfxUInt32x4 uvec4 +#define FfxInt32 int +#define FfxInt32x2 ivec2 +#define FfxInt32x3 ivec3 +#define FfxInt32x4 ivec4 + +/// A [cacao_placeholder] typedef for matrix type until confirmed. +#define FfxFloat32x4x4 mat4 +#define FfxFloat32x3x3 mat3 +#define FfxFloat32x2x2 mat2 + +#if FFXM_HALF +#define FfxFloat16 float16_t +#define FfxFloat16x2 f16vec2 +#define FfxFloat16x3 f16vec3 +#define FfxFloat16x4 f16vec4 +#define FfxUInt16 uint16_t +#define FfxUInt16x2 u16vec2 +#define FfxUInt16x3 u16vec3 +#define FfxUInt16x4 u16vec4 +#define FfxInt16 int16_t +#define FfxInt16x2 i16vec2 +#define FfxInt16x3 i16vec3 +#define FfxInt16x4 i16vec4 +#endif // FFXM_HALF +#endif // #if defined(FFXM_GLSL) + + +#if FFXM_HALF + +#if FFXM_HLSL_6_2 + +#define FFXM_MIN16_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType##16_t TypeName; +#define FFXM_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFXM_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#define FFXM_16BIT_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType##16_t TypeName; +#define FFXM_16BIT_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFXM_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#else //FFXM_HLSL_6_2 + +#define FFXM_MIN16_SCALAR( TypeName, BaseComponentType ) typedef min16##BaseComponentType TypeName; +#define FFXM_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFXM_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#define FFXM_16BIT_SCALAR( TypeName, BaseComponentType ) FFXM_MIN16_SCALAR( TypeName, BaseComponentType ); +#define FFXM_16BIT_VECTOR( TypeName, BaseComponentType, COL ) FFXM_MIN16_VECTOR( TypeName, BaseComponentType, COL ); +#define FFXM_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) FFXM_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ); + +#endif //FFXM_HLSL_6_2 + +#else //FFXM_HALF + +#define FFXM_MIN16_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType TypeName; +#define FFXM_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFXM_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#define FFXM_16BIT_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType TypeName; +#define FFXM_16BIT_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFXM_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#endif //FFXM_HALF + +#if defined(FFXM_GPU) +// Common typedefs: +#if defined(FFXM_HLSL) +FFXM_MIN16_SCALAR( FFXM_MIN16_F , float ); +FFXM_MIN16_VECTOR( FFXM_MIN16_F2, float, 2 ); +FFXM_MIN16_VECTOR( FFXM_MIN16_F3, float, 3 ); +FFXM_MIN16_VECTOR( FFXM_MIN16_F4, float, 4 ); + +FFXM_MIN16_SCALAR( FFXM_MIN16_I, int ); +FFXM_MIN16_VECTOR( FFXM_MIN16_I2, int, 2 ); +FFXM_MIN16_VECTOR( FFXM_MIN16_I3, int, 3 ); +FFXM_MIN16_VECTOR( FFXM_MIN16_I4, int, 4 ); + +FFXM_MIN16_SCALAR( FFXM_MIN16_U, uint ); +FFXM_MIN16_VECTOR( FFXM_MIN16_U2, uint, 2 ); +FFXM_MIN16_VECTOR( FFXM_MIN16_U3, uint, 3 ); +FFXM_MIN16_VECTOR( FFXM_MIN16_U4, uint, 4 ); + +FFXM_16BIT_SCALAR( FFXM_F16_t , float ); +FFXM_16BIT_VECTOR( FFXM_F16_t2, float, 2 ); +FFXM_16BIT_VECTOR( FFXM_F16_t3, float, 3 ); +FFXM_16BIT_VECTOR( FFXM_F16_t4, float, 4 ); + +FFXM_16BIT_SCALAR( FFXM_I16_t, int ); +FFXM_16BIT_VECTOR( FFXM_I16_t2, int, 2 ); +FFXM_16BIT_VECTOR( FFXM_I16_t3, int, 3 ); +FFXM_16BIT_VECTOR( FFXM_I16_t4, int, 4 ); + +FFXM_16BIT_SCALAR( FFXM_U16_t, uint ); +FFXM_16BIT_VECTOR( FFXM_U16_t2, uint, 2 ); +FFXM_16BIT_VECTOR( FFXM_U16_t3, uint, 3 ); +FFXM_16BIT_VECTOR( FFXM_U16_t4, uint, 4 ); + +#define TYPEDEF_MIN16_TYPES(Prefix) \ +typedef FFXM_MIN16_F Prefix##_F; \ +typedef FFXM_MIN16_F2 Prefix##_F2; \ +typedef FFXM_MIN16_F3 Prefix##_F3; \ +typedef FFXM_MIN16_F4 Prefix##_F4; \ +typedef FFXM_MIN16_I Prefix##_I; \ +typedef FFXM_MIN16_I2 Prefix##_I2; \ +typedef FFXM_MIN16_I3 Prefix##_I3; \ +typedef FFXM_MIN16_I4 Prefix##_I4; \ +typedef FFXM_MIN16_U Prefix##_U; \ +typedef FFXM_MIN16_U2 Prefix##_U2; \ +typedef FFXM_MIN16_U3 Prefix##_U3; \ +typedef FFXM_MIN16_U4 Prefix##_U4; + +#define TYPEDEF_16BIT_TYPES(Prefix) \ +typedef FFXM_16BIT_F Prefix##_F; \ +typedef FFXM_16BIT_F2 Prefix##_F2; \ +typedef FFXM_16BIT_F3 Prefix##_F3; \ +typedef FFXM_16BIT_F4 Prefix##_F4; \ +typedef FFXM_16BIT_I Prefix##_I; \ +typedef FFXM_16BIT_I2 Prefix##_I2; \ +typedef FFXM_16BIT_I3 Prefix##_I3; \ +typedef FFXM_16BIT_I4 Prefix##_I4; \ +typedef FFXM_16BIT_U Prefix##_U; \ +typedef FFXM_16BIT_U2 Prefix##_U2; \ +typedef FFXM_16BIT_U3 Prefix##_U3; \ +typedef FFXM_16BIT_U4 Prefix##_U4; + +#define TYPEDEF_FULL_PRECISION_TYPES(Prefix) \ +typedef FfxFloat32 Prefix##_F; \ +typedef FfxFloat32x2 Prefix##_F2; \ +typedef FfxFloat32x3 Prefix##_F3; \ +typedef FfxFloat32x4 Prefix##_F4; \ +typedef FfxInt32 Prefix##_I; \ +typedef FfxInt32x2 Prefix##_I2; \ +typedef FfxInt32x3 Prefix##_I3; \ +typedef FfxInt32x4 Prefix##_I4; \ +typedef FfxUInt32 Prefix##_U; \ +typedef FfxUInt32x2 Prefix##_U2; \ +typedef FfxUInt32x3 Prefix##_U3; \ +typedef FfxUInt32x4 Prefix##_U4; +#endif // #if defined(FFXM_HLSL) + +#if defined(FFXM_GLSL) + +#if FFXM_HALF + +#define FFXM_MIN16_F float16_t +#define FFXM_MIN16_F2 f16vec2 +#define FFXM_MIN16_F3 f16vec3 +#define FFXM_MIN16_F4 f16vec4 + +#define FFXM_MIN16_I int16_t +#define FFXM_MIN16_I2 i16vec2 +#define FFXM_MIN16_I3 i16vec3 +#define FFXM_MIN16_I4 i16vec4 + +#define FFXM_MIN16_U uint16_t +#define FFXM_MIN16_U2 u16vec2 +#define FFXM_MIN16_U3 u16vec3 +#define FFXM_MIN16_U4 u16vec4 + +#define FFXM_16BIT_F float16_t +#define FFXM_16BIT_F2 f16vec2 +#define FFXM_16BIT_F3 f16vec3 +#define FFXM_16BIT_F4 f16vec4 + +#define FFXM_16BIT_I int16_t +#define FFXM_16BIT_I2 i16vec2 +#define FFXM_16BIT_I3 i16vec3 +#define FFXM_16BIT_I4 i16vec4 + +#define FFXM_16BIT_U uint16_t +#define FFXM_16BIT_U2 u16vec2 +#define FFXM_16BIT_U3 u16vec3 +#define FFXM_16BIT_U4 u16vec4 + +#else // FFXM_HALF + +#define FFXM_MIN16_F float +#define FFXM_MIN16_F2 vec2 +#define FFXM_MIN16_F3 vec3 +#define FFXM_MIN16_F4 vec4 + +#define FFXM_MIN16_I int +#define FFXM_MIN16_I2 ivec2 +#define FFXM_MIN16_I3 ivec3 +#define FFXM_MIN16_I4 ivec4 + +#define FFXM_MIN16_U uint +#define FFXM_MIN16_U2 uvec2 +#define FFXM_MIN16_U3 uvec3 +#define FFXM_MIN16_U4 uvec4 + +#define FFXM_16BIT_F float +#define FFXM_16BIT_F2 vec2 +#define FFXM_16BIT_F3 vec3 +#define FFXM_16BIT_F4 vec4 + +#define FFXM_16BIT_I int +#define FFXM_16BIT_I2 ivec2 +#define FFXM_16BIT_I3 ivec3 +#define FFXM_16BIT_I4 ivec4 + +#define FFXM_16BIT_U uint +#define FFXM_16BIT_U2 uvec2 +#define FFXM_16BIT_U3 uvec3 +#define FFXM_16BIT_U4 uvec4 + +#endif // FFXM_HALF + +#endif // #if defined(FFXM_GLSL) + +#endif // #if defined(FFXM_GPU) +#endif // #ifndef FFXM_COMMON_TYPES_H diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h.meta new file mode 100644 index 0000000..12003b2 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h.meta @@ -0,0 +1,67 @@ +fileFormatVersion: 2 +guid: 86143f20804e7ad40af9d5e4bb7038f6 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Android: 1 + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core.h new file mode 100644 index 0000000..ee924e4 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core.h @@ -0,0 +1,69 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +/// @defgroup FfxGPU GPU +/// The FidelityFX SDK GPU References +/// +/// @ingroup ffxSDK + +/// @defgroup FfxHLSL HLSL References +/// FidelityFX SDK HLSL GPU References +/// +/// @ingroup FfxGPU + +/// @defgroup FfxGLSL GLSL References +/// FidelityFX SDK GLSL GPU References +/// +/// @ingroup FfxGPU + +/// @defgroup FfxGPUEffects FidelityFX GPU References +/// FidelityFX Effect GPU Reference Documentation +/// +/// @ingroup FfxGPU + +/// @defgroup GPUCore GPU Core +/// GPU defines and functions +/// +/// @ingroup FfxGPU + +#if !defined(FFXM_CORE_H) +#define FFXM_CORE_H + +#include "ffxm_common_types.h" + +#if defined(FFXM_CPU) +#include "ffxm_core_cpu.h" +#endif // #if defined(FFXM_CPU) + +#if defined(FFXM_GLSL) && defined(FFXM_GPU) +#include "ffxm_core_glsl.h" +#endif // #if defined(FFXM_GLSL) && defined(FFXM_GPU) + +#if defined(FFXM_HLSL) && defined(FFXM_GPU) +#include "ffxm_core_hlsl.h" +#endif // #if defined(FFXM_HLSL) && defined(FFXM_GPU) + +#if defined(FFXM_GPU) +#include "ffxm_core_gpu_common.h" +#include "ffxm_core_gpu_common_half.h" +#include "ffxm_core_portability.h" +#endif // #if defined(FFXM_GPU) +#endif // #if !defined(FFXM_CORE_H) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core.h.meta new file mode 100644 index 0000000..90bce22 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core.h.meta @@ -0,0 +1,67 @@ +fileFormatVersion: 2 +guid: 1de7e5f01f4c625458dbda94917d9aa1 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Android: 1 + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_cpu.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_cpu.h new file mode 100644 index 0000000..e32dbd3 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_cpu.h @@ -0,0 +1,337 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +/// A define for a true value in a boolean expression. +/// +/// @ingroup CPUTypes +#define FFXM_TRUE (1) + +/// A define for a false value in a boolean expression. +/// +/// @ingroup CPUTypes +#define FFXM_FALSE (0) + +#if !defined(FFXM_STATIC) +/// A define to abstract declaration of static variables and functions. +/// +/// @ingroup CPUTypes +#define FFXM_STATIC static +#endif // #if !defined(FFXM_STATIC) + +/// @defgroup CPUCore CPU Core +/// Core CPU-side defines and functions +/// +/// @ingroup ffxHost + +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wunused-variable" +#endif + +/// Interpret the bit layout of an IEEE-754 floating point value as an unsigned integer. +/// +/// @param [in] x A 32bit floating value. +/// +/// @returns +/// An unsigned 32bit integer value containing the bit pattern of x. +/// +/// @ingroup CPUCore +FFXM_STATIC FfxUInt32 ffxAsUInt32(FfxFloat32 x) +{ + union + { + FfxFloat32 f; + FfxUInt32 u; + } bits; + + bits.f = x; + return bits.u; +} + +FFXM_STATIC FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b) +{ + return a[0] * b[0] + a[1] * b[1]; +} + +FFXM_STATIC FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b) +{ + return a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; +} + +FFXM_STATIC FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b) +{ + return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3]; +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the GLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup CPUCore +FFXM_STATIC FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t) +{ + return y * t + (-x * t + x); +} + +/// Compute the reciprocal of a value. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal value of x. +/// +/// @ingroup CPUCore +FFXM_STATIC FfxFloat32 ffxReciprocal(FfxFloat32 x) +{ + return 1.0f / x; +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup CPUCore +FFXM_STATIC FfxFloat32 ffxSqrt(FfxFloat32 x) +{ + return sqrt(x); +} + +FFXM_STATIC FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b) +{ + return FfxUInt32(FfxInt32(a) >> FfxInt32(b)); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup CPUCore +FFXM_STATIC FfxFloat32 ffxFract(FfxFloat32 x) +{ + return x - floor(x); +} + +/// Compute the reciprocal square root of a value. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal square root value of x. +/// +/// @ingroup CPUCore +FFXM_STATIC FfxFloat32 rsqrt(FfxFloat32 x) +{ + return ffxReciprocal(ffxSqrt(x)); +} + +FFXM_STATIC FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y) +{ + return x < y ? x : y; +} + +FFXM_STATIC FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y) +{ + return x < y ? x : y; +} + +FFXM_STATIC FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y) +{ + return x > y ? x : y; +} + +FFXM_STATIC FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y) +{ + return x > y ? x : y; +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup CPUCore +FFXM_STATIC FfxFloat32 ffxSaturate(FfxFloat32 x) +{ + return ffxMin(1.0f, ffxMax(0.0f, x)); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +FFXM_STATIC void opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +{ + d[0] = a[0] + b; + d[1] = a[1] + b; + d[2] = a[2] + b; + return; +} + +FFXM_STATIC void opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a) +{ + d[0] = a[0]; + d[1] = a[1]; + d[2] = a[2]; + return; +} + +FFXM_STATIC void opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b) +{ + d[0] = a[0] * b[0]; + d[1] = a[1] * b[1]; + d[2] = a[2] * b[2]; + return; +} + +FFXM_STATIC void opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +{ + d[0] = a[0] * b; + d[1] = a[1] * b; + d[2] = a[2] * b; + return; +} + +FFXM_STATIC void opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a) +{ + d[0] = ffxReciprocal(a[0]); + d[1] = ffxReciprocal(a[1]); + d[2] = ffxReciprocal(a[2]); + return; +} + +/// Convert FfxFloat32 to half (in lower 16-bits of output). +/// +/// This function implements the same fast technique that is documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf +/// +/// The function supports denormals. +/// +/// Some conversion rules are to make computations possibly "safer" on the GPU, +/// -INF & -NaN -> -65504 +/// +INF & +NaN -> +65504 +/// +/// @param [in] f The 32bit floating point value to convert. +/// +/// @returns +/// The closest 16bit floating point value to f. +/// +/// @ingroup CPUCore +FFXM_STATIC FfxUInt32 f32tof16(FfxFloat32 f) +{ + static FfxUInt16 base[512] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, + 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, + 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, + 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, + 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, 0xf000, 0xf400, 0xf800, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff + }; + + static FfxUInt8 shift[512] = { + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18 + }; + + union + { + FfxFloat32 f; + FfxUInt32 u; + } bits; + + bits.f = f; + FfxUInt32 u = bits.u; + FfxUInt32 i = u >> 23; + return (FfxUInt32)(base[i]) + ((u & 0x7fffff) >> shift[i]); +} + +/// Pack 2x32-bit floating point values in a single 32bit value. +/// +/// This function first converts each component of value into their nearest 16-bit floating +/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the +/// 32bit unsigned integer respectively. +/// +/// @param [in] x A 2-dimensional floating point value to convert and pack. +/// +/// @returns +/// A packed 32bit value containing 2 16bit floating point values. +/// +/// @ingroup CPUCore +FFXM_STATIC FfxUInt32 packHalf2x16(FfxFloat32x2 x) +{ + return f32tof16(x[0]) + (f32tof16(x[1]) << 16); +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_cpu.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_cpu.h.meta new file mode 100644 index 0000000..b8f442f --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_cpu.h.meta @@ -0,0 +1,67 @@ +fileFormatVersion: 2 +guid: 94fb3b7a7fde2f7448c52c5c262f5c01 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Android: 1 + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common.h new file mode 100644 index 0000000..e8df503 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common.h @@ -0,0 +1,2812 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +/// Shader quality related defines +/// +/// FFXM_FSR2_OPTION_SHADER_OPT_BALANCED. If defined, optimizations related to the balanced preset will be enabled. +/// FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE. If defined, optimizations related to the performance preset will be enabled. +#ifndef FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE +#define FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE 0 +#endif +#ifndef FFXM_FSR2_OPTION_SHADER_OPT_BALANCED +#define FFXM_FSR2_OPTION_SHADER_OPT_BALANCED 0 +#endif +/// FFXM_SHADER_QUALITY_BALANCED_OR_PERFORMANCE. Helper to identify if any of these profiles is used. +#define FFXM_SHADER_QUALITY_BALANCED_OR_PERFORMANCE (FFXM_FSR2_OPTION_SHADER_OPT_BALANCED || FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE) + +/// Both Balanced/Performance. Keep the temporal reactive as a separate RT to improve bandwidth of color history buffer. +#define FFXM_SHADER_QUALITY_OPT_SEPARATE_TEMPORAL_REACTIVE FFXM_SHADER_QUALITY_BALANCED_OR_PERFORMANCE +/// Both Balanced/Performance. Disable deringing when doing the color reprojection with the history +#define FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING FFXM_SHADER_QUALITY_BALANCED_OR_PERFORMANCE +/// Both Balanced/Performance. Disable the Luma stability factor +#define FFXM_SHADER_QUALITY_OPT_DISABLE_LUMA_INSTABILITY FFXM_SHADER_QUALITY_BALANCED_OR_PERFORMANCE +/// Both Balanced/Performance. Use a 5-tap Lanczos kernel instead of the 9-tap used for `Quality` for upsampling +#define FFXM_SHADER_QUALITY_OPT_UPSCALING_LANCZOS_5TAP FFXM_SHADER_QUALITY_BALANCED_OR_PERFORMANCE +/// Balanced. Use Catmull-Rom (9 samples) for history reprojection +#define FFXM_SHADER_QUALITY_OPT_REPROJECT_CATMULL_9TAP FFXM_FSR2_OPTION_SHADER_OPT_BALANCED +/// Performance. PreparedInputColor is now stored as R8G8B8A8_Unorm tonemapped data. Rectification don't use `YCoCg` anymore +#define FFXM_SHADER_QUALITY_OPT_TONEMAPPED_RGB_PREPARED_INPUT_COLOR FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE +/// Performance. Use Catmull-Rom (5 samples) for history reprojection +#define FFXM_SHADER_QUALITY_OPT_REPROJECT_CATMULL_5TAP FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE + +#if !defined(FFXM_SHADER_PLATFORM_GLES_3_2) +#define FFXM_SHADER_PLATFORM_GLES_3_2 (0) +#endif + +/// A define for a true value in a boolean expression. +/// +/// @ingroup GPUCore +#define FFXM_TRUE (true) + +/// A define for a false value in a boolean expression. +/// +/// @ingroup GPUCore +#define FFXM_FALSE (false) + +/// A define value for positive infinity. +/// +/// @ingroup GPUCore +#define FFXM_POSITIVE_INFINITY_FLOAT ffxAsFloat(0x7f800000u) + +/// A define value for negative infinity. +/// +/// @ingroup GPUCore +#define FFXM_NEGATIVE_INFINITY_FLOAT ffxAsFloat(0xff800000u) + +/// A define value for PI. +/// +/// @ingroup GPUCore +#define FFXM_PI (3.14159) + + +/// Compute the reciprocal of value. +/// +/// @param [in] value The value to compute the reciprocal of. +/// +/// @returns +/// The 1 / value. +/// +/// @ingroup GPUCore +FfxFloat32 ffxReciprocal(FfxFloat32 value) +{ + return rcp(value); +} + +/// Compute the reciprocal of value. +/// +/// @param [in] value The value to compute the reciprocal of. +/// +/// @returns +/// The 1 / value. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxReciprocal(FfxFloat32x2 value) +{ + return rcp(value); +} + +/// Compute the reciprocal of value. +/// +/// @param [in] value The value to compute the reciprocal of. +/// +/// @returns +/// The 1 / value. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxReciprocal(FfxFloat32x3 value) +{ + return rcp(value); +} + +/// Compute the reciprocal of value. +/// +/// @param [in] value The value to compute the reciprocal of. +/// +/// @returns +/// The 1 / value. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxReciprocal(FfxFloat32x4 value) +{ + return rcp(value); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxMin(FfxFloat32x2 x, FfxFloat32x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxMin(FfxFloat32x3 x, FfxFloat32x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxMin(FfxFloat32x4 x, FfxFloat32x4 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32 ffxMin(FfxInt32 x, FfxInt32 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x2 ffxMin(FfxInt32x2 x, FfxInt32x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x3 ffxMin(FfxInt32x3 x, FfxInt32x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x4 ffxMin(FfxInt32x4 x, FfxInt32x4 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxMin(FfxUInt32x2 x, FfxUInt32x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxMin(FfxUInt32x3 x, FfxUInt32x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxMin(FfxUInt32x4 x, FfxUInt32x4 y) +{ + return min(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxMax(FfxFloat32x2 x, FfxFloat32x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxMax(FfxFloat32x3 x, FfxFloat32x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxMax(FfxFloat32x4 x, FfxFloat32x4 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32 ffxMax(FfxInt32 x, FfxInt32 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x2 ffxMax(FfxInt32x2 x, FfxInt32x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x3 ffxMax(FfxInt32x3 x, FfxInt32x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x4 ffxMax(FfxInt32x4 x, FfxInt32x4 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxMax(FfxUInt32x2 x, FfxUInt32x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxMax(FfxUInt32x3 x, FfxUInt32x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxMax(FfxUInt32x4 x, FfxUInt32x4 y) +{ + return max(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat32 ffxPow(FfxFloat32 x, FfxFloat32 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxPow(FfxFloat32x2 x, FfxFloat32x2 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxPow(FfxFloat32x3 x, FfxFloat32x3 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxPow(FfxFloat32x4 x, FfxFloat32x4 y) +{ + return pow(x, y); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat32 ffxSqrt(FfxFloat32 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxSqrt(FfxFloat32x2 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxSqrt(FfxFloat32x3 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxSqrt(FfxFloat32x4 x) +{ + return sqrt(x); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat32 ffxCopySignBit(FfxFloat32 d, FfxFloat32 s) +{ + return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & FfxUInt32(0x80000000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxCopySignBit(FfxFloat32x2 d, FfxFloat32x2 s) +{ + return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast2(0x80000000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxCopySignBit(FfxFloat32x3 d, FfxFloat32x3 s) +{ + return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast3(0x80000000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxCopySignBit(FfxFloat32x4 d, FfxFloat32x4 s) +{ + return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast4(0x80000000u))); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat32 ffxIsSigned(FfxFloat32 m) +{ + return ffxSaturate(m * FfxFloat32(FFXM_NEGATIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxIsSigned(FfxFloat32x2 m) +{ + return ffxSaturate(m * ffxBroadcast2(FFXM_NEGATIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxIsSigned(FfxFloat32x3 m) +{ + return ffxSaturate(m * ffxBroadcast3(FFXM_NEGATIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against for have the sign set. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or positive. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxIsSigned(FfxFloat32x4 m) +{ + return ffxSaturate(m * ffxBroadcast4(FFXM_NEGATIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat32 ffxIsGreaterThanZero(FfxFloat32 m) +{ + return ffxSaturate(m * FfxFloat32(FFXM_POSITIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxIsGreaterThanZero(FfxFloat32x2 m) +{ + return ffxSaturate(m * ffxBroadcast2(FFXM_POSITIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxIsGreaterThanZero(FfxFloat32x3 m) +{ + return ffxSaturate(m * ffxBroadcast3(FFXM_POSITIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxIsGreaterThanZero(FfxFloat32x4 m) +{ + return ffxSaturate(m * ffxBroadcast4(FFXM_POSITIVE_INFINITY_FLOAT)); +} + +/// Convert a 32bit floating point value to sortable integer. +/// +/// - If sign bit=0, flip the sign bit (positives). +/// - If sign bit=1, flip all bits (negatives). +/// +/// The function has the side effects that: +/// - Larger integers are more positive values. +/// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage). +/// +/// @param [in] value The floating point value to make sortable. +/// +/// @returns +/// The sortable integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxFloatToSortableInteger(FfxUInt32 value) +{ + return value ^ ((AShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000)); +} + +/// Convert a sortable integer to a 32bit floating point value. +/// +/// The function has the side effects that: +/// - If sign bit=1, flip the sign bit (positives). +/// - If sign bit=0, flip all bits (negatives). +/// +/// @param [in] value The floating point value to make sortable. +/// +/// @returns +/// The sortable integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxSortableIntegerToFloat(FfxUInt32 value) +{ + return value ^ ((~AShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000)); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateSqrt(FfxFloat32 value) +{ + return ffxAsFloat((ffxAsUInt32(value) >> FfxUInt32(1)) + FfxUInt32(0x1fbc4639)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateReciprocal(FfxFloat32 value) +{ + return ffxAsFloat(FfxUInt32(0x7ef07ebb) - ffxAsUInt32(value)); +} + +/// Calculate a medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateReciprocalMedium(FfxFloat32 value) +{ + FfxFloat32 b = ffxAsFloat(FfxUInt32(0x7ef19fff) - ffxAsUInt32(value)); + return b * (-b * value + FfxFloat32(2.0)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal square root for. +/// +/// @returns +/// An approximation of the reciprocal square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateReciprocalSquareRoot(FfxFloat32 value) +{ + return ffxAsFloat(FfxUInt32(0x5f347d74) - (ffxAsUInt32(value) >> FfxUInt32(1))); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateSqrt(FfxFloat32x2 value) +{ + return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast2(1u)) + ffxBroadcast2(0x1fbc4639u)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateReciprocal(FfxFloat32x2 value) +{ + return ffxAsFloat(ffxBroadcast2(0x7ef07ebbu) - ffxAsUInt32(value)); +} + +/// Calculate a medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateReciprocalMedium(FfxFloat32x2 value) +{ + FfxFloat32x2 b = ffxAsFloat(ffxBroadcast2(0x7ef19fffu) - ffxAsUInt32(value)); + return b * (-b * value + ffxBroadcast2(2.0f)); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateReciprocalSquareRoot(FfxFloat32x2 value) +{ + return ffxAsFloat(ffxBroadcast2(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast2(1u))); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateSqrt(FfxFloat32x3 value) +{ + return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast3(1u)) + ffxBroadcast3(0x1fbc4639u)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateReciprocal(FfxFloat32x3 value) +{ + return ffxAsFloat(ffxBroadcast3(0x7ef07ebbu) - ffxAsUInt32(value)); +} + +/// Calculate a medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateReciprocalMedium(FfxFloat32x3 value) +{ + FfxFloat32x3 b = ffxAsFloat(ffxBroadcast3(0x7ef19fffu) - ffxAsUInt32(value)); + return b * (-b * value + ffxBroadcast3(2.0f)); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateReciprocalSquareRoot(FfxFloat32x3 value) +{ + return ffxAsFloat(ffxBroadcast3(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast3(1u))); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateSqrt(FfxFloat32x4 value) +{ + return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast4(1u)) + ffxBroadcast4(0x1fbc4639u)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateReciprocal(FfxFloat32x4 value) +{ + return ffxAsFloat(ffxBroadcast4(0x7ef07ebbu) - ffxAsUInt32(value)); +} + +/// Calculate a medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateReciprocalMedium(FfxFloat32x4 value) +{ + FfxFloat32x4 b = ffxAsFloat(ffxBroadcast4(0x7ef19fffu) - ffxAsUInt32(value)); + return b * (-b * value + ffxBroadcast4(2.0f)); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateReciprocalSquareRoot(FfxFloat32x4 value) +{ + return ffxAsFloat(ffxBroadcast4(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast4(1u))); +} + +/// Calculate dot product of 'a' and 'b'. +/// +/// @param [in] a First vector input. +/// @param [in] b Second vector input. +/// +/// @returns +/// The value of a dot b. +/// +/// @ingroup GPUCore +FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b) +{ + return dot(a, b); +} + +/// Calculate dot product of 'a' and 'b'. +/// +/// @param [in] a First vector input. +/// @param [in] b Second vector input. +/// +/// @returns +/// The value of a dot b. +/// +/// @ingroup GPUCore +FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b) +{ + return dot(a, b); +} + +/// Calculate dot product of 'a' and 'b'. +/// +/// @param [in] a First vector input. +/// @param [in] b Second vector input. +/// +/// @returns +/// The value of a dot b. +/// +/// @ingroup GPUCore +FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b) +{ + return dot(a, b); +} + + +/// Compute an approximate conversion from PQ to Gamma2 space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and Gamma2. +/// +/// @returns +/// The value a converted into Gamma2. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximatePQToGamma2Medium(FfxFloat32 a) +{ + return a * a * a * a; +} + +/// Compute an approximate conversion from PQ to linear space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and linear. +/// +/// @returns +/// The value a converted into linear. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximatePQToLinear(FfxFloat32 a) +{ + return a * a * a * a * a * a * a * a; +} + +/// Compute an approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateGamma2ToPQ(FfxFloat32 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46)); +} + +/// Compute a more accurate approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateGamma2ToPQMedium(FfxFloat32 a) +{ + FfxFloat32 b = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46)); + FfxFloat32 b4 = b * b * b * b; + return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); +} + +/// Compute a high accuracy approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateGamma2ToPQHigh(FfxFloat32 a) +{ + return ffxSqrt(ffxSqrt(a)); +} + +/// Compute an approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateLinearToPQ(FfxFloat32 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723)); +} + +/// Compute a more accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateLinearToPQMedium(FfxFloat32 a) +{ + FfxFloat32 b = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723)); + FfxFloat32 b8 = b * b * b * b * b * b * b * b; + return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); +} + +/// Compute a very accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateLinearToPQHigh(FfxFloat32 a) +{ + return ffxSqrt(ffxSqrt(ffxSqrt(a))); +} + +/// Compute an approximate conversion from PQ to Gamma2 space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and Gamma2. +/// +/// @returns +/// The value a converted into Gamma2. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximatePQToGamma2Medium(FfxFloat32x2 a) +{ + return a * a * a * a; +} + +/// Compute an approximate conversion from PQ to linear space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and linear. +/// +/// @returns +/// The value a converted into linear. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximatePQToLinear(FfxFloat32x2 a) +{ + return a * a * a * a * a * a * a * a; +} + +/// Compute an approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateGamma2ToPQ(FfxFloat32x2 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u)); +} + +/// Compute a more accurate approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateGamma2ToPQMedium(FfxFloat32x2 a) +{ + FfxFloat32x2 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u)); + FfxFloat32x2 b4 = b * b * b * b; + return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); +} + +/// Compute a high accuracy approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateGamma2ToPQHigh(FfxFloat32x2 a) +{ + return ffxSqrt(ffxSqrt(a)); +} + +/// Compute an approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateLinearToPQ(FfxFloat32x2 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u)); +} + +/// Compute a more accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateLinearToPQMedium(FfxFloat32x2 a) +{ + FfxFloat32x2 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u)); + FfxFloat32x2 b8 = b * b * b * b * b * b * b * b; + return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); +} + +/// Compute a very accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateLinearToPQHigh(FfxFloat32x2 a) +{ + return ffxSqrt(ffxSqrt(ffxSqrt(a))); +} + +/// Compute an approximate conversion from PQ to Gamma2 space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and Gamma2. +/// +/// @returns +/// The value a converted into Gamma2. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximatePQToGamma2Medium(FfxFloat32x3 a) +{ + return a * a * a * a; +} + +/// Compute an approximate conversion from PQ to linear space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and linear. +/// +/// @returns +/// The value a converted into linear. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximatePQToLinear(FfxFloat32x3 a) +{ + return a * a * a * a * a * a * a * a; +} + +/// Compute an approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateGamma2ToPQ(FfxFloat32x3 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u)); +} + +/// Compute a more accurate approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateGamma2ToPQMedium(FfxFloat32x3 a) +{ + FfxFloat32x3 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u)); + FfxFloat32x3 b4 = b * b * b * b; + return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); +} + +/// Compute a high accuracy approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateGamma2ToPQHigh(FfxFloat32x3 a) +{ + return ffxSqrt(ffxSqrt(a)); +} + +/// Compute an approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateLinearToPQ(FfxFloat32x3 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u)); +} + +/// Compute a more accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateLinearToPQMedium(FfxFloat32x3 a) +{ + FfxFloat32x3 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u)); + FfxFloat32x3 b8 = b * b * b * b * b * b * b * b; + return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); +} + +/// Compute a very accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateLinearToPQHigh(FfxFloat32x3 a) +{ + return ffxSqrt(ffxSqrt(ffxSqrt(a))); +} + +/// Compute an approximate conversion from PQ to Gamma2 space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and Gamma2. +/// +/// @returns +/// The value a converted into Gamma2. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximatePQToGamma2Medium(FfxFloat32x4 a) +{ + return a * a * a * a; +} + +/// Compute an approximate conversion from PQ to linear space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and linear. +/// +/// @returns +/// The value a converted into linear. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximatePQToLinear(FfxFloat32x4 a) +{ + return a * a * a * a * a * a * a * a; +} + +/// Compute an approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateGamma2ToPQ(FfxFloat32x4 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u)); +} + +/// Compute a more accurate approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateGamma2ToPQMedium(FfxFloat32x4 a) +{ + FfxFloat32x4 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u)); + FfxFloat32x4 b4 = b * b * b * b * b * b * b * b; + return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); +} + +/// Compute a high accuracy approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateGamma2ToPQHigh(FfxFloat32x4 a) +{ + return ffxSqrt(ffxSqrt(a)); +} + +/// Compute an approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateLinearToPQ(FfxFloat32x4 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u)); +} + +/// Compute a more accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateLinearToPQMedium(FfxFloat32x4 a) +{ + FfxFloat32x4 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u)); + FfxFloat32x4 b8 = b * b * b * b * b * b * b * b; + return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); +} + +/// Compute a very accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateLinearToPQHigh(FfxFloat32x4 a) +{ + return ffxSqrt(ffxSqrt(ffxSqrt(a))); +} + +// An approximation of sine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate sine for. +// +// @returns +// The approximate sine of value. +FfxFloat32 ffxParabolicSin(FfxFloat32 value) +{ + return value * abs(value) - value; +} + +// An approximation of sine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate sine for. +// +// @returns +// The approximate sine of value. +FfxFloat32x2 ffxParabolicSin(FfxFloat32x2 x) +{ + return x * abs(x) - x; +} + +// An approximation of cosine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate cosine for. +// +// @returns +// The approximate cosine of value. +FfxFloat32 ffxParabolicCos(FfxFloat32 x) +{ + x = ffxFract(x * FfxFloat32(0.5) + FfxFloat32(0.75)); + x = x * FfxFloat32(2.0) - FfxFloat32(1.0); + return ffxParabolicSin(x); +} + +// An approximation of cosine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate cosine for. +// +// @returns +// The approximate cosine of value. +FfxFloat32x2 ffxParabolicCos(FfxFloat32x2 x) +{ + x = ffxFract(x * ffxBroadcast2(0.5f) + ffxBroadcast2(0.75f)); + x = x * ffxBroadcast2(2.0f) - ffxBroadcast2(1.0f); + return ffxParabolicSin(x); +} + +// An approximation of both sine and cosine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate cosine for. +// +// @returns +// A FfxFloat32x2 containing approximations of both sine and cosine of value. +FfxFloat32x2 ffxParabolicSinCos(FfxFloat32 x) +{ + FfxFloat32 y = ffxFract(x * FfxFloat32(0.5) + FfxFloat32(0.75)); + y = y * FfxFloat32(2.0) - FfxFloat32(1.0); + return ffxParabolicSin(FfxFloat32x2(x, y)); +} + +/// Conditional free logic AND operation using values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt32 ffxZeroOneAnd(FfxUInt32 x, FfxUInt32 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x, FfxUInt32x2 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x, FfxUInt32x3 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x, FfxUInt32x4 y) +{ + return min(x, y); +} + +/// Conditional free logic NOT operation using two values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt32 ffxZeroOneAnd(FfxUInt32 x) +{ + return x ^ FfxUInt32(1); +} + +/// Conditional free logic NOT operation using two values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x) +{ + return x ^ ffxBroadcast2(1u); +} + +/// Conditional free logic NOT operation using two values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x) +{ + return x ^ ffxBroadcast3(1u); +} + +/// Conditional free logic NOT operation using two values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x) +{ + return x ^ ffxBroadcast4(1u); +} + +/// Conditional free logic OR operation using two values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt32 ffxZeroOneOr(FfxUInt32 x, FfxUInt32 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxZeroOneOr(FfxUInt32x2 x, FfxUInt32x2 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxZeroOneOr(FfxUInt32x3 x, FfxUInt32x3 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxZeroOneOr(FfxUInt32x4 x, FfxUInt32x4 y) +{ + return max(x, y); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxUInt32 ffxZeroOneAndToU1(FfxFloat32 x) +{ + return FfxUInt32(FfxFloat32(1.0) - x); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxZeroOneAndToU2(FfxFloat32x2 x) +{ + return FfxUInt32x2(ffxBroadcast2(1.0) - x); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxZeroOneAndToU3(FfxFloat32x3 x) +{ + return FfxUInt32x3(ffxBroadcast3(1.0) - x); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxZeroOneAndToU4(FfxFloat32x4 x) +{ + return FfxUInt32x4(ffxBroadcast4(1.0) - x); +} + +/// Conditional free logic AND operation using two values followed by a NOT operation +/// using the resulting value and a third value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneAndOr(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two values followed by a NOT operation +/// using the resulting value and a third value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneAndOr(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two values followed by a NOT operation +/// using the resulting value and a third value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneAndOr(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two values followed by a NOT operation +/// using the resulting value and a third value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneAndOr(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return ffxSaturate(x * y + z); +} + +/// Given a value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneIsGreaterThanZero(FfxFloat32 x) +{ + return ffxSaturate(x * FfxFloat32(FFXM_POSITIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneIsGreaterThanZero(FfxFloat32x2 x) +{ + return ffxSaturate(x * ffxBroadcast2(FFXM_POSITIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneIsGreaterThanZero(FfxFloat32x3 x) +{ + return ffxSaturate(x * ffxBroadcast3(FFXM_POSITIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneIsGreaterThanZero(FfxFloat32x4 x) +{ + return ffxSaturate(x * ffxBroadcast4(FFXM_POSITIVE_INFINITY_FLOAT)); +} + +/// Conditional free logic signed NOT operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneAnd(FfxFloat32 x) +{ + return FfxFloat32(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneAnd(FfxFloat32x2 x) +{ + return ffxBroadcast2(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneAnd(FfxFloat32x3 x) +{ + return ffxBroadcast3(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneAnd(FfxFloat32x4 x) +{ + return ffxBroadcast4(1.0) - x; +} + +/// Conditional free logic OR operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneOr(FfxFloat32 x, FfxFloat32 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneOr(FfxFloat32x2 x, FfxFloat32x2 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneOr(FfxFloat32x3 x, FfxFloat32x3 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneOr(FfxFloat32x4 x, FfxFloat32x4 y) +{ + return max(x, y); +} + +/// Choose between two FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneSelect(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + FfxFloat32 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneSelect(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + FfxFloat32x2 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneSelect(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + FfxFloat32x3 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneSelect(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + FfxFloat32x4 r = (-x) * z + z; + return x * y + r; +} + +/// Given a value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneIsSigned(FfxFloat32 x) +{ + return ffxSaturate(x * FfxFloat32(FFXM_NEGATIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneIsSigned(FfxFloat32x2 x) +{ + return ffxSaturate(x * ffxBroadcast2(FFXM_NEGATIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneIsSigned(FfxFloat32x3 x) +{ + return ffxSaturate(x * ffxBroadcast3(FFXM_NEGATIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneIsSigned(FfxFloat32x4 x) +{ + return ffxSaturate(x * ffxBroadcast4(FFXM_NEGATIVE_INFINITY_FLOAT)); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] color The color to convert to Rec. 709. +/// +/// @returns +/// The color in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxRec709FromLinear(FfxFloat32 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099); + return clamp(j.x, color * j.y, pow(color, j.z) * k.x + k.y); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] color The color to convert to Rec. 709. +/// +/// @returns +/// The color in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxRec709FromLinear(FfxFloat32x2 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099); + return clamp(j.xx, color * j.yy, pow(color, j.zz) * k.xx + k.yy); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] color The color to convert to Rec. 709. +/// +/// @returns +/// The color in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxRec709FromLinear(FfxFloat32x3 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099); + return clamp(j.xxx, color * j.yyy, pow(color, j.zzz) * k.xxx + k.yyy); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGamma. +/// +/// @param [in] value The value to convert to gamma space from linear. +/// @param [in] power The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxGammaFromLinear(FfxFloat32 value, FfxFloat32 power) +{ + return pow(value, FfxFloat32(power)); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGamma. +/// +/// @param [in] value The value to convert to gamma space from linear. +/// @param [in] power The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxGammaFromLinear(FfxFloat32x2 value, FfxFloat32 power) +{ + return pow(value, ffxBroadcast2(power)); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGamma. +/// +/// @param [in] value The value to convert to gamma space from linear. +/// @param [in] power The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxGammaFromLinear(FfxFloat32x3 value, FfxFloat32 power) +{ + return pow(value, ffxBroadcast3(power)); +} + +/// Compute a PQ value from a linear value. +/// +/// @param [in] value The value to convert to PQ from linear. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxPQToLinear(FfxFloat32 value) +{ + FfxFloat32 p = pow(value, FfxFloat32(0.159302)); + return pow((FfxFloat32(0.835938) + FfxFloat32(18.8516) * p) / (FfxFloat32(1.0) + FfxFloat32(18.6875) * p), FfxFloat32(78.8438)); +} + +/// Compute a PQ value from a linear value. +/// +/// @param [in] value The value to convert to PQ from linear. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxPQToLinear(FfxFloat32x2 value) +{ + FfxFloat32x2 p = pow(value, ffxBroadcast2(0.159302)); + return pow((ffxBroadcast2(0.835938) + ffxBroadcast2(18.8516) * p) / (ffxBroadcast2(1.0) + ffxBroadcast2(18.6875) * p), ffxBroadcast2(78.8438)); +} + +/// Compute a PQ value from a linear value. +/// +/// @param [in] value The value to convert to PQ from linear. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxPQToLinear(FfxFloat32x3 value) +{ + FfxFloat32x3 p = pow(value, ffxBroadcast3(0.159302)); + return pow((ffxBroadcast3(0.835938) + ffxBroadcast3(18.8516) * p) / (ffxBroadcast3(1.0) + ffxBroadcast3(18.6875) * p), ffxBroadcast3(78.8438)); +} + +/// Compute a linear value from a SRGB value. +/// +/// @param [in] value The value to convert to linear from SRGB. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxSrgbToLinear(FfxFloat32 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); + return clamp(j.x, value * j.y, pow(value, j.z) * k.x + k.y); +} + +/// Compute a linear value from a SRGB value. +/// +/// @param [in] value The value to convert to linear from SRGB. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxSrgbToLinear(FfxFloat32x2 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); + return clamp(j.xx, value * j.yy, pow(value, j.zz) * k.xx + k.yy); +} + +/// Compute a linear value from a SRGB value. +/// +/// @param [in] value The value to convert to linear from SRGB. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxSrgbToLinear(FfxFloat32x3 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); + return clamp(j.xxx, value * j.yyy, pow(value, j.zzz) * k.xxx + k.yyy); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] color The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxLinearFromRec709(FfxFloat32 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.x), color * j.y, pow(color * k.x + k.y, j.z)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] color The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxLinearFromRec709(FfxFloat32x2 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xx), color * j.yy, pow(color * k.xx + k.yy, j.zz)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] color The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxLinearFromRec709(FfxFloat32x3 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xxx), color * j.yyy, pow(color * k.xxx + k.yyy, j.zzz)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] color The value to convert to linear in gamma space. +/// @param [in] power The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxLinearFromGamma(FfxFloat32 color, FfxFloat32 power) +{ + return pow(color, FfxFloat32(power)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] color The value to convert to linear in gamma space. +/// @param [in] power The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxLinearFromGamma(FfxFloat32x2 color, FfxFloat32 power) +{ + return pow(color, ffxBroadcast2(power)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] color The value to convert to linear in gamma space. +/// @param [in] power The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxLinearFromGamma(FfxFloat32x3 color, FfxFloat32 power) +{ + return pow(color, ffxBroadcast3(power)); +} + +/// Compute a linear value from a value in a PQ space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in PQ space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxLinearFromPQ(FfxFloat32 value) +{ + FfxFloat32 p = pow(value, FfxFloat32(0.0126833)); + return pow(ffxSaturate(p - FfxFloat32(0.835938)) / (FfxFloat32(18.8516) - FfxFloat32(18.6875) * p), FfxFloat32(6.27739)); +} + +/// Compute a linear value from a value in a PQ space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in PQ space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxLinearFromPQ(FfxFloat32x2 value) +{ + FfxFloat32x2 p = pow(value, ffxBroadcast2(0.0126833)); + return pow(ffxSaturate(p - ffxBroadcast2(0.835938)) / (ffxBroadcast2(18.8516) - ffxBroadcast2(18.6875) * p), ffxBroadcast2(6.27739)); +} + +/// Compute a linear value from a value in a PQ space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in PQ space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxLinearFromPQ(FfxFloat32x3 value) +{ + FfxFloat32x3 p = pow(value, ffxBroadcast3(0.0126833)); + return pow(ffxSaturate(p - ffxBroadcast3(0.835938)) / (ffxBroadcast3(18.8516) - ffxBroadcast3(18.6875) * p), ffxBroadcast3(6.27739)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxLinearFromSrgb(FfxFloat32 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.x), value * j.y, pow(value * k.x + k.y, j.z)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xx), value * j.yy, pow(value * k.xx + k.yy, j.zz)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxLinearFromSrgb(FfxFloat32x3 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xxx), value * j.yyy, pow(value * k.xxx + k.yyy, j.zzz)); +} + +/// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear. +/// +/// 543210 +/// ====== +/// ..xxx. +/// yy...y +/// +/// @param [in] a The input 1D coordinates to remap. +/// +/// @returns +/// The remapped 2D coordinates. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxRemapForQuad(FfxUInt32 a) +{ + return FfxUInt32x2(bitfieldExtract(a, 1u, 3u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), a, 1u)); +} + +/// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions. +/// +/// The 64-wide lane indices to 8x8 remapping is performed as follows: +/// +/// 00 01 08 09 10 11 18 19 +/// 02 03 0a 0b 12 13 1a 1b +/// 04 05 0c 0d 14 15 1c 1d +/// 06 07 0e 0f 16 17 1e 1f +/// 20 21 28 29 30 31 38 39 +/// 22 23 2a 2b 32 33 3a 3b +/// 24 25 2c 2d 34 35 3c 3d +/// 26 27 2e 2f 36 37 3e 3f +/// +/// @param [in] a The input 1D coordinate to remap. +/// +/// @returns +/// The remapped 2D coordinates. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxRemapForWaveReduction(FfxUInt32 a) +{ + return FfxUInt32x2(bitfieldInsertMask(bitfieldExtract(a, 2u, 3u), a, 1u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), bitfieldExtract(a, 1u, 2u), 2u)); +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common.h.meta new file mode 100644 index 0000000..e85b1e0 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common.h.meta @@ -0,0 +1,67 @@ +fileFormatVersion: 2 +guid: 049e52a8031c0c44f9c2b503e90b844e +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Android: 1 + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common_half.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common_half.h new file mode 100644 index 0000000..3f8ac4b --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common_half.h @@ -0,0 +1,2978 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#if FFXM_HALF +#if FFXM_HLSL_6_2 +/// A define value for 16bit positive infinity. +/// +/// @ingroup GPUCore +#define FFXM_POSITIVE_INFINITY_HALF FFXM_TO_FLOAT16((uint16_t)0x7c00u) + +/// A define value for 16bit negative infinity. +/// +/// @ingroup GPUCore +#define FFXM_NEGATIVE_INFINITY_HALF FFXM_TO_FLOAT16((uint16_t)0xfc00u) +#else +/// A define value for 16bit positive infinity. +/// +/// @ingroup GPUCore +#define FFXM_POSITIVE_INFINITY_HALF FFXM_TO_FLOAT16(0x7c00u) + +/// A define value for 16bit negative infinity. +/// +/// @ingroup GPUCore +#define FFXM_NEGATIVE_INFINITY_HALF FFXM_TO_FLOAT16(0xfc00u) +#endif // FFXM_HLSL_6_2 + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16 ffxMin(FfxFloat16 x, FfxFloat16 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxMin(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxMin(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxMin(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16 ffxMin(FfxInt16 x, FfxInt16 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x2 ffxMin(FfxInt16x2 x, FfxInt16x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x3 ffxMin(FfxInt16x3 x, FfxInt16x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x4 ffxMin(FfxInt16x4 x, FfxInt16x4 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16 ffxMin(FfxUInt16 x, FfxUInt16 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxMin(FfxUInt16x2 x, FfxUInt16x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxMin(FfxUInt16x3 x, FfxUInt16x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxMin(FfxUInt16x4 x, FfxUInt16x4 y) +{ + return min(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16 ffxMax(FfxFloat16 x, FfxFloat16 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxMax(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxMax(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxMax(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16 ffxMax(FfxInt16 x, FfxInt16 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x2 ffxMax(FfxInt16x2 x, FfxInt16x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x3 ffxMax(FfxInt16x3 x, FfxInt16x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x4 ffxMax(FfxInt16x4 x, FfxInt16x4 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16 ffxMax(FfxUInt16 x, FfxUInt16 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxMax(FfxUInt16x2 x, FfxUInt16x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxMax(FfxUInt16x3 x, FfxUInt16x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxMax(FfxUInt16x4 x, FfxUInt16x4 y) +{ + return max(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat16 ffxPow(FfxFloat16 x, FfxFloat16 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPow(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxPow(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxPow(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return pow(x, y); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat16 ffxSqrt(FfxFloat16 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxSqrt(FfxFloat16x2 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxSqrt(FfxFloat16x3 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxSqrt(FfxFloat16x4 x) +{ + return sqrt(x); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat16 ffxCopySignBitHalf(FfxFloat16 d, FfxFloat16 s) +{ + return FFXM_TO_FLOAT16(FFXM_TO_UINT16(d) | (FFXM_TO_UINT16(s) & FFXM_BROADCAST_UINT16(0x8000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxCopySignBitHalf(FfxFloat16x2 d, FfxFloat16x2 s) +{ + return FFXM_TO_FLOAT16X2(FFXM_TO_UINT16X2(d) | (FFXM_TO_UINT16X2(s) & FFXM_BROADCAST_UINT16X2(0x8000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxCopySignBitHalf(FfxFloat16x3 d, FfxFloat16x3 s) +{ + return FFXM_TO_FLOAT16X3(FFXM_TO_UINT16X3(d) | (FFXM_TO_UINT16X3(s) & FFXM_BROADCAST_UINT16X3(0x8000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxCopySignBitHalf(FfxFloat16x4 d, FfxFloat16x4 s) +{ + return FFXM_TO_FLOAT16X4(FFXM_TO_UINT16X4(d) | (FFXM_TO_UINT16X4(s) & FFXM_BROADCAST_UINT16X4(0x8000u))); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat16 ffxIsSignedHalf(FfxFloat16 m) +{ + return ffxSaturate(m * FFXM_BROADCAST_FLOAT16(FFXM_NEGATIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxIsSignedHalf(FfxFloat16x2 m) +{ + return ffxSaturate(m * FFXM_BROADCAST_FLOAT16X2(FFXM_NEGATIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxIsSignedHalf(FfxFloat16x3 m) +{ + return ffxSaturate(m * FFXM_BROADCAST_FLOAT16X3(FFXM_NEGATIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxIsSignedHalf(FfxFloat16x4 m) +{ + return ffxSaturate(m * FFXM_BROADCAST_FLOAT16X4(FFXM_NEGATIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat16 ffxIsGreaterThanZeroHalf(FfxFloat16 m) +{ + return ffxSaturate(m * FFXM_BROADCAST_FLOAT16(FFXM_POSITIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxIsGreaterThanZeroHalf(FfxFloat16x2 m) +{ + return ffxSaturate(m * FFXM_BROADCAST_FLOAT16X2(FFXM_POSITIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxIsGreaterThanZeroHalf(FfxFloat16x3 m) +{ + return ffxSaturate(m * FFXM_BROADCAST_FLOAT16X3(FFXM_POSITIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxIsGreaterThanZeroHalf(FfxFloat16x4 m) +{ + return ffxSaturate(m * FFXM_BROADCAST_FLOAT16X4(FFXM_POSITIVE_INFINITY_HALF)); +} + +/// Convert a 16bit floating point value to sortable integer. +/// +/// - If sign bit=0, flip the sign bit (positives). +/// - If sign bit=1, flip all bits (negatives). +/// +/// The function has the side effects that: +/// - Larger integers are more positive values. +/// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage). +/// +/// @param [in] x The floating point value to make sortable. +/// +/// @returns +/// The sortable integer value. +/// +/// @ingroup GPUCore +FfxUInt16 ffxFloatToSortableIntegerHalf(FfxUInt16 x) +{ + return x ^ ((ffxBitShiftRightHalf(x, FFXM_BROADCAST_UINT16(15))) | FFXM_BROADCAST_UINT16(0x8000)); +} + +/// Convert a sortable integer to a 16bit floating point value. +/// +/// The function has the side effects that: +/// - If sign bit=1, flip the sign bit (positives). +/// - If sign bit=0, flip all bits (negatives). +/// +/// @param [in] x The sortable integer value to make floating point. +/// +/// @returns +/// The floating point value. +/// +/// @ingroup GPUCore +FfxUInt16 ffxSortableIntegerToFloatHalf(FfxUInt16 x) +{ + return x ^ ((~ffxBitShiftRightHalf(x, FFXM_BROADCAST_UINT16(15))) | FFXM_BROADCAST_UINT16(0x8000)); +} + +/// Convert a pair of 16bit floating point values to a pair of sortable integers. +/// +/// - If sign bit=0, flip the sign bit (positives). +/// - If sign bit=1, flip all bits (negatives). +/// +/// The function has the side effects that: +/// - Larger integers are more positive values. +/// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage). +/// +/// @param [in] x The floating point values to make sortable. +/// +/// @returns +/// The sortable integer values. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxFloatToSortableIntegerHalf(FfxUInt16x2 x) +{ + return x ^ ((ffxBitShiftRightHalf(x, FFXM_BROADCAST_UINT16X2(15))) | FFXM_BROADCAST_UINT16X2(0x8000)); +} + +/// Convert a pair of sortable integers to a pair of 16bit floating point values. +/// +/// The function has the side effects that: +/// - If sign bit=1, flip the sign bit (positives). +/// - If sign bit=0, flip all bits (negatives). +/// +/// @param [in] x The sortable integer values to make floating point. +/// +/// @returns +/// The floating point values. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxSortableIntegerToFloatHalf(FfxUInt16x2 x) +{ + return x ^ ((~ffxBitShiftRightHalf(x, FFXM_BROADCAST_UINT16X2(15))) | FFXM_BROADCAST_UINT16X2(0x8000)); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// [Zero] Y0 [Zero] X0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesZeroY0ZeroX0(FfxUInt32x2 i) +{ + return ((i.x) & 0xffu) | ((i.y << 16) & 0xff0000u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// [Zero] Y1 [Zero] X1 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesZeroY1ZeroX1(FfxUInt32x2 i) +{ + return ((i.x >> 8) & 0xffu) | ((i.y << 8) & 0xff0000u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// [Zero] Y2 [Zero] X2 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesZeroY2ZeroX2(FfxUInt32x2 i) +{ + return ((i.x >> 16) & 0xffu) | ((i.y) & 0xff0000u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// [Zero] Y3 [Zero] X3 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesZeroY3ZeroX3(FfxUInt32x2 i) +{ + return ((i.x >> 24) & 0xffu) | ((i.y >> 8) & 0xff0000u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 Y2 Y1 X0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3Y2Y1X0(FfxUInt32x2 i) +{ + return ((i.x) & 0x000000ffu) | (i.y & 0xffffff00u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 Y2 Y1 X2 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3Y2Y1X2(FfxUInt32x2 i) +{ + return ((i.x >> 16) & 0x000000ffu) | (i.y & 0xffffff00u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 Y2 X0 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3Y2X0Y0(FfxUInt32x2 i) +{ + return ((i.x << 8) & 0x0000ff00u) | (i.y & 0xffff00ffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 Y2 X2 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3Y2X2Y0(FfxUInt32x2 i) +{ + return ((i.x >> 8) & 0x0000ff00u) | (i.y & 0xffff00ffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 X0 Y1 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3X0Y1Y0(FfxUInt32x2 i) +{ + return ((i.x << 16) & 0x00ff0000u) | (i.y & 0xff00ffffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 X2 Y1 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3X2Y1Y0(FfxUInt32x2 i) +{ + return ((i.x) & 0x00ff0000u) | (i.y & 0xff00ffffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// X0 Y2 Y1 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesX0Y2Y1Y0(FfxUInt32x2 i) +{ + return ((i.x << 24) & 0xff000000u) | (i.y & 0x00ffffffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// X2 Y2 Y1 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesX2Y2Y1Y0(FfxUInt32x2 i) +{ + return ((i.x << 8) & 0xff000000u) | (i.y & 0x00ffffffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y2 X2 Y0 X0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY2X2Y0X0(FfxUInt32x2 i) +{ + return ((i.x) & 0x00ff00ffu) | ((i.y << 8) & 0xff00ff00u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y2 Y0 X2 X0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY2Y0X2X0(FfxUInt32x2 i) +{ + return (((i.x) & 0xffu) | ((i.x >> 8) & 0xff00u) | ((i.y << 16) & 0xff0000u) | ((i.y << 8) & 0xff000000u)); +} + +/// Takes two Float16x2 values x and y, normalizes them and builds a single Uint16x2 value in the format {{x0,y0},{x1,y1}}. +/// +/// @param [in] x The first float16x2 value to pack. +/// @param [in] y The second float16x2 value to pack. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxPackX0Y0X1Y1UnsignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y) +{ + x *= FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0); + y *= FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0); + return FFXM_UINT32_TO_UINT16X2(ffxPackBytesY2X2Y0X0(FfxUInt32x2(FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(x)), FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(y))))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7], +/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// r=ffxPermuteUByte0Float16x2ToUint2(d,i) +/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits +/// Where 'k1' is an SGPR with 0x???? +/// Where 'k2' is an SGPR with 0x???? +/// V_PK_FMA_F16 i,i,k0.x,0 +/// V_PERM_B32 r.x,i,i,k1 +/// V_PERM_B32 r.y,i,i,k2 +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteUByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15], +/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// r=ffxPermuteUByte1Float16x2ToUint2(d,i) +/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits +/// Where 'k1' is an SGPR with 0x???? +/// Where 'k2' is an SGPR with 0x???? +/// V_PK_FMA_F16 i,i,k0.x,0 +/// V_PERM_B32 r.x,i,i,k1 +/// V_PERM_B32 r.y,i,i,k2 +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteUByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23], +/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops. +/// +/// r=ffxPermuteUByte2Float16x2ToUint2(d,i) +/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits +/// Where 'k1' is an SGPR with 0x???? +/// Where 'k2' is an SGPR with 0x???? +/// V_PK_FMA_F16 i,i,k0.x,0 +/// V_PERM_B32 r.x,i,i,k1 +/// V_PERM_B32 r.y,i,i,k2 +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteUByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31], +/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops. +/// +/// r=ffxPermuteUByte3Float16x2ToUint2(d,i) +/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits +/// Where 'k1' is an SGPR with 0x???? +/// Where 'k2' is an SGPR with 0x???? +/// V_PK_FMA_F16 i,i,k0.x,0 +/// V_PERM_B32 r.x,i,i,k1 +/// V_PERM_B32 r.y,i,i,k2 +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteUByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0))); + return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteUByte0Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFXM_BROADCAST_FLOAT16X2(32768.0); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteUByte1Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFXM_BROADCAST_FLOAT16X2(32768.0); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteUByte2Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFXM_BROADCAST_FLOAT16X2(32768.0); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteUByte3Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFXM_BROADCAST_FLOAT16X2(32768.0); +} + +/// Takes two Float16x2 values x and y, normalizes them and builds a single Uint16x2 value in the format {{x0,y0},{x1,y1}}. +/// +/// @param [in] x The first float16x2 value to pack. +/// @param [in] y The second float16x2 value to pack. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxPackX0Y0X1Y1SignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y) +{ + x = x * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFXM_BROADCAST_FLOAT16X2(0.25 / 32768.0); + y = y * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFXM_BROADCAST_FLOAT16X2(0.25 / 32768.0); + return FFXM_UINT32_TO_UINT16X2(ffxPackBytesY2X2Y0X0(FfxUInt32x2(FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(x)), FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(y))))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7], +/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFXM_BROADCAST_FLOAT16X2(0.25 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15], +/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFXM_BROADCAST_FLOAT16X2(0.25 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23], +/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFXM_BROADCAST_FLOAT16X2(0.25 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31], +/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFXM_BROADCAST_FLOAT16X2(0.25 / 32768.0))); + return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7], +/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). +/// This is useful if there is a desire for cleared values to decode as zero. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteZeroBasedSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFXM_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; + return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15], +/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). +/// This is useful if there is a desire for cleared values to decode as zero. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteZeroBasedSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFXM_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; + return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23], +/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops. +/// +/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). +/// This is useful if there is a desire for cleared values to decode as zero. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteZeroBasedSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFXM_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; + return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31], +/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops. +/// +/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). +/// This is useful if there is a desire for cleared values to decode as zero. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteZeroBasedSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFXM_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; + return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteSByte0Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFXM_BROADCAST_FLOAT16X2(32768.0) - FFXM_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteSByte1Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFXM_BROADCAST_FLOAT16X2(32768.0) - FFXM_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteSByte2Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFXM_BROADCAST_FLOAT16X2(32768.0) - FFXM_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteSByte3Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFXM_BROADCAST_FLOAT16X2(32768.0) - FFXM_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteZeroBasedSByte0Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i) ^ 0x00800080u)) * FFXM_BROADCAST_FLOAT16X2(32768.0) - FFXM_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteZeroBasedSByte1Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i) ^ 0x00800080u)) * FFXM_BROADCAST_FLOAT16X2(32768.0) - FFXM_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteZeroBasedSByte2Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i) ^ 0x00800080u)) * FFXM_BROADCAST_FLOAT16X2(32768.0) - FFXM_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteZeroBasedSByte3Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i) ^ 0x00800080u)) * FFXM_BROADCAST_FLOAT16X2(32768.0) - FFXM_BROADCAST_FLOAT16X2(0.25); +} + +/// Calculate a half-precision low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16 ffxApproximateSqrtHalf(FfxFloat16 a) +{ + return FFXM_TO_FLOAT16((FFXM_TO_UINT16(a) >> FFXM_BROADCAST_UINT16(1)) + FFXM_BROADCAST_UINT16(0x1de2)); +} + +/// Calculate a half-precision low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxApproximateSqrtHalf(FfxFloat16x2 a) +{ + return FFXM_TO_FLOAT16X2((FFXM_TO_UINT16X2(a) >> FFXM_BROADCAST_UINT16X2(1)) + FFXM_BROADCAST_UINT16X2(0x1de2)); +} + +/// Calculate a half-precision low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxApproximateSqrtHalf(FfxFloat16x3 a) +{ + return FFXM_TO_FLOAT16X3((FFXM_TO_UINT16X3(a) >> FFXM_BROADCAST_UINT16X3(1)) + FFXM_BROADCAST_UINT16X3(0x1de2)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16 ffxApproximateReciprocalHalf(FfxFloat16 a) +{ + return FFXM_TO_FLOAT16(FFXM_BROADCAST_UINT16(0x7784) - FFXM_TO_UINT16(a)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxApproximateReciprocalHalf(FfxFloat16x2 a) +{ + return FFXM_TO_FLOAT16X2(FFXM_BROADCAST_UINT16X2(0x7784) - FFXM_TO_UINT16X2(a)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxApproximateReciprocalHalf(FfxFloat16x3 a) +{ + return FFXM_TO_FLOAT16X3(FFXM_BROADCAST_UINT16X3(0x7784) - FFXM_TO_UINT16X3(a)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxApproximateReciprocalHalf(FfxFloat16x4 a) +{ + return FFXM_TO_FLOAT16X4(FFXM_BROADCAST_UINT16X4(0x7784) - FFXM_TO_UINT16X4(a)); +} + +/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat16 ffxApproximateReciprocalMediumHalf(FfxFloat16 a) +{ + FfxFloat16 b = FFXM_TO_FLOAT16(FFXM_BROADCAST_UINT16(0x778d) - FFXM_TO_UINT16(a)); + return b * (-b * a + FFXM_BROADCAST_FLOAT16(2.0)); +} + +/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxApproximateReciprocalMediumHalf(FfxFloat16x2 a) +{ + FfxFloat16x2 b = FFXM_TO_FLOAT16X2(FFXM_BROADCAST_UINT16X2(0x778d) - FFXM_TO_UINT16X2(a)); + return b * (-b * a + FFXM_BROADCAST_FLOAT16X2(2.0)); +} + +/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxApproximateReciprocalMediumHalf(FfxFloat16x3 a) +{ + FfxFloat16x3 b = FFXM_TO_FLOAT16X3(FFXM_BROADCAST_UINT16X3(0x778d) - FFXM_TO_UINT16X3(a)); + return b * (-b * a + FFXM_BROADCAST_FLOAT16X3(2.0)); +} + +/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxApproximateReciprocalMediumHalf(FfxFloat16x4 a) +{ + FfxFloat16x4 b = FFXM_TO_FLOAT16X4(FFXM_BROADCAST_UINT16X4(0x778d) - FFXM_TO_UINT16X4(a)); + return b * (-b * a + FFXM_BROADCAST_FLOAT16X4(2.0)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. +/// +/// @returns +/// An approximation of the reciprocal of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16 ffxApproximateReciprocalSquareRootHalf(FfxFloat16 a) +{ + return FFXM_TO_FLOAT16(FFXM_BROADCAST_UINT16(0x59a3) - (FFXM_TO_UINT16(a) >> FFXM_BROADCAST_UINT16(1))); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. +/// +/// @returns +/// An approximation of the reciprocal of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x2 a) +{ + return FFXM_TO_FLOAT16X2(FFXM_BROADCAST_UINT16X2(0x59a3) - (FFXM_TO_UINT16X2(a) >> FFXM_BROADCAST_UINT16X2(1))); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. +/// +/// @returns +/// An approximation of the reciprocal of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x3 a) +{ + return FFXM_TO_FLOAT16X3(FFXM_BROADCAST_UINT16X3(0x59a3) - (FFXM_TO_UINT16X3(a) >> FFXM_BROADCAST_UINT16X3(1))); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. +/// +/// @returns +/// An approximation of the reciprocal of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x4 a) +{ + return FFXM_TO_FLOAT16X4(FFXM_BROADCAST_UINT16X4(0x59a3) - (FFXM_TO_UINT16X4(a) >> FFXM_BROADCAST_UINT16X4(1))); +} + +/// An approximation of sine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate sine for. +/// +/// @returns +/// The approximate sine of value. +FfxFloat16 ffxParabolicSinHalf(FfxFloat16 x) +{ + return x * abs(x) - x; +} + +/// An approximation of sine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate sine for. +/// +/// @returns +/// The approximate sine of value. +FfxFloat16x2 ffxParabolicSinHalf(FfxFloat16x2 x) +{ + return x * abs(x) - x; +} + +/// An approximation of cosine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate cosine for. +/// +/// @returns +/// The approximate cosine of value. +FfxFloat16 ffxParabolicCosHalf(FfxFloat16 x) +{ + x = ffxFract(x * FFXM_BROADCAST_FLOAT16(0.5) + FFXM_BROADCAST_FLOAT16(0.75)); + x = x * FFXM_BROADCAST_FLOAT16(2.0) - FFXM_BROADCAST_FLOAT16(1.0); + return ffxParabolicSinHalf(x); +} + +/// An approximation of cosine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate cosine for. +/// +/// @returns +/// The approximate cosine of value. +FfxFloat16x2 ffxParabolicCosHalf(FfxFloat16x2 x) +{ + x = ffxFract(x * FFXM_BROADCAST_FLOAT16X2(0.5) + FFXM_BROADCAST_FLOAT16X2(0.75)); + x = x * FFXM_BROADCAST_FLOAT16X2(2.0) - FFXM_BROADCAST_FLOAT16X2(1.0); + return ffxParabolicSinHalf(x); +} + +/// An approximation of both sine and cosine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate cosine for. +/// +/// @returns +/// A FfxFloat32x2 containing approximations of both sine and cosine of value. +FfxFloat16x2 ffxParabolicSinCosHalf(FfxFloat16 x) +{ + FfxFloat16 y = ffxFract(x * FFXM_BROADCAST_FLOAT16(0.5) + FFXM_BROADCAST_FLOAT16(0.75)); + y = y * FFXM_BROADCAST_FLOAT16(2.0) - FFXM_BROADCAST_FLOAT16(1.0); + return ffxParabolicSinHalf(FfxFloat16x2(x, y)); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt16 ffxZeroOneAndHalf(FfxUInt16 x, FfxUInt16 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxZeroOneAndHalf(FfxUInt16x2 x, FfxUInt16x2 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxZeroOneAndHalf(FfxUInt16x3 x, FfxUInt16x3 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxZeroOneAndHalf(FfxUInt16x4 x, FfxUInt16x4 y) +{ + return min(x, y); +} + +/// Conditional free logic NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt16 ffxZeroOneNotHalf(FfxUInt16 x) +{ + return x ^ FFXM_BROADCAST_UINT16(1); +} + +/// Conditional free logic NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxZeroOneNotHalf(FfxUInt16x2 x) +{ + return x ^ FFXM_BROADCAST_UINT16X2(1); +} + +/// Conditional free logic NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxZeroOneNotHalf(FfxUInt16x3 x) +{ + return x ^ FFXM_BROADCAST_UINT16X3(1); +} + +/// Conditional free logic NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxZeroOneNotHalf(FfxUInt16x4 x) +{ + return x ^ FFXM_BROADCAST_UINT16X4(1); +} + +/// Conditional free logic OR operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt16 ffxZeroOneOrHalf(FfxUInt16 x, FfxUInt16 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxZeroOneOrHalf(FfxUInt16x2 x, FfxUInt16x2 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxZeroOneOrHalf(FfxUInt16x3 x, FfxUInt16x3 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxZeroOneOrHalf(FfxUInt16x4 x, FfxUInt16x4 y) +{ + return max(x, y); +} + +/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. +/// +/// @param [in] x The value to converted to a Uint. +/// +/// @returns +/// The converted Uint value. +/// +/// @ingroup GPUCore +FfxUInt16 ffxZeroOneFloat16ToUint16(FfxFloat16 x) +{ + return FFXM_TO_UINT16(x * FFXM_TO_FLOAT16(FFXM_TO_UINT16(1))); +} + +/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. +/// +/// @param [in] x The value to converted to a Uint. +/// +/// @returns +/// The converted Uint value. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxZeroOneFloat16x2ToUint16x2(FfxFloat16x2 x) +{ + return FFXM_TO_UINT16X2(x * FFXM_TO_FLOAT16X2(FfxUInt16x2(1, 1))); +} + +/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. +/// +/// @param [in] x The value to converted to a Uint. +/// +/// @returns +/// The converted Uint value. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxZeroOneFloat16x3ToUint16x3(FfxFloat16x3 x) +{ + return FFXM_TO_UINT16X3(x * FFXM_TO_FLOAT16X3(FfxUInt16x3(1, 1, 1))); +} + +/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. +/// +/// @param [in] x The value to converted to a Uint. +/// +/// @returns +/// The converted Uint value. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxZeroOneFloat16x4ToUint16x4(FfxFloat16x4 x) +{ + return FFXM_TO_UINT16X4(x * FFXM_TO_FLOAT16X4(FfxUInt16x4(1, 1, 1, 1))); +} + +/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. +/// +/// @param [in] x The value to converted to a half-precision FfxFloat32. +/// +/// @returns +/// The converted half-precision FfxFloat32 value. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneUint16ToFloat16(FfxUInt16 x) +{ + return FFXM_TO_FLOAT16(x * FFXM_TO_UINT16(FFXM_TO_FLOAT16(1.0))); +} + +/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. +/// +/// @param [in] x The value to converted to a half-precision FfxFloat32. +/// +/// @returns +/// The converted half-precision FfxFloat32 value. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneUint16x2ToFloat16x2(FfxUInt16x2 x) +{ + return FFXM_TO_FLOAT16X2(x * FFXM_TO_UINT16X2(FfxUInt16x2(FFXM_TO_FLOAT16(1.0), FFXM_TO_FLOAT16(1.0)))); +} + +/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. +/// +/// @param [in] x The value to converted to a half-precision FfxFloat32. +/// +/// @returns +/// The converted half-precision FfxFloat32 value. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneUint16x3ToFloat16x3(FfxUInt16x3 x) +{ + return FFXM_TO_FLOAT16X3(x * FFXM_TO_UINT16X3(FfxUInt16x3(FFXM_TO_FLOAT16(1.0), FFXM_TO_FLOAT16(1.0), FFXM_TO_FLOAT16(1.0)))); +} + +/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. +/// +/// @param [in] x The value to converted to a half-precision FfxFloat32. +/// +/// @returns +/// The converted half-precision FfxFloat32 value. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneUint16x4ToFloat16x4(FfxUInt16x4 x) +{ + return FFXM_TO_FLOAT16X4(x * FFXM_TO_UINT16X4(FfxUInt16x4(FFXM_TO_FLOAT16(1.0), FFXM_TO_FLOAT16(1.0), FFXM_TO_FLOAT16(1.0), FFXM_TO_FLOAT16(1.0)))); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneAndHalf(FfxFloat16 x, FfxFloat16 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneAndHalf(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneAndHalf(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneAndHalf(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return min(x, y); +} + +/// Conditional free logic AND NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND NOT operator. +/// @param [in] y The second value to be fed into the AND NOT operator. +/// +/// @returns +/// Result of the AND NOT operation. +/// +/// @ingroup GPUCore +FfxFloat16 ffxSignedZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y) +{ + return (-x) * y + FFXM_BROADCAST_FLOAT16(1.0); +} + +/// Conditional free logic AND NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND NOT operator. +/// @param [in] y The second value to be fed into the AND NOT operator. +/// +/// @returns +/// Result of the AND NOT operation. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxSignedZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return (-x) * y + FFXM_BROADCAST_FLOAT16X2(1.0); +} + +/// Conditional free logic AND NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND NOT operator. +/// @param [in] y The second value to be fed into the AND NOT operator. +/// +/// @returns +/// Result of the AND NOT operation. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxSignedZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return (-x) * y + FFXM_BROADCAST_FLOAT16X3(1.0); +} + +/// Conditional free logic AND NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND NOT operator. +/// @param [in] y The second value to be fed into the AND NOT operator. +/// +/// @returns +/// Result of the AND NOT operation. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxSignedZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return (-x) * y + FFXM_BROADCAST_FLOAT16X4(1.0); +} + +/// Conditional free logic AND operation using two half-precision values followed by +/// a NOT operation using the resulting value and a third half-precision value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two half-precision values followed by +/// a NOT operation using the resulting value and a third half-precision value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two half-precision values followed by +/// a NOT operation using the resulting value and a third half-precision value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two half-precision values followed by +/// a NOT operation using the resulting value and a third half-precision value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) +{ + return ffxSaturate(x * y + z); +} + +/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16 x) +{ + return ffxSaturate(x * FFXM_BROADCAST_FLOAT16(FFXM_POSITIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x2 x) +{ + return ffxSaturate(x * FFXM_BROADCAST_FLOAT16X2(FFXM_POSITIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x3 x) +{ + return ffxSaturate(x * FFXM_BROADCAST_FLOAT16X3(FFXM_POSITIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x4 x) +{ + return ffxSaturate(x * FFXM_BROADCAST_FLOAT16X4(FFXM_POSITIVE_INFINITY_HALF)); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneNotHalf(FfxFloat16 x) +{ + return FFXM_BROADCAST_FLOAT16(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneNotHalf(FfxFloat16x2 x) +{ + return FFXM_BROADCAST_FLOAT16X2(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneNotHalf(FfxFloat16x3 x) +{ + return FFXM_BROADCAST_FLOAT16X3(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneNotHalf(FfxFloat16x4 x) +{ + return FFXM_BROADCAST_FLOAT16X4(1.0) - x; +} + +/// Conditional free logic OR operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneOrHalf(FfxFloat16 x, FfxFloat16 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneOrHalf(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneOrHalf(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneOrHalf(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return max(x, y); +} + +/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneSelectHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) +{ + FfxFloat16 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneSelectHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) +{ + FfxFloat16x2 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneSelectHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) +{ + FfxFloat16x3 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneSelectHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) +{ + FfxFloat16x4 r = (-x) * z + z; + return x * y + r; +} + +/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneIsSignedHalf(FfxFloat16 x) +{ + return ffxSaturate(x * FFXM_BROADCAST_FLOAT16(FFXM_NEGATIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneIsSignedHalf(FfxFloat16x2 x) +{ + return ffxSaturate(x * FFXM_BROADCAST_FLOAT16X2(FFXM_NEGATIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneIsSignedHalf(FfxFloat16x3 x) +{ + return ffxSaturate(x * FFXM_BROADCAST_FLOAT16X3(FFXM_NEGATIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneIsSignedHalf(FfxFloat16x4 x) +{ + return ffxSaturate(x * FFXM_BROADCAST_FLOAT16X4(FFXM_NEGATIVE_INFINITY_HALF)); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] c The color to convert to Rec. 709. +/// +/// @returns +/// The color in Rec.709 space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxRec709FromLinearHalf(FfxFloat16 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099); + return clamp(j.x, c * j.y, pow(c, j.z) * k.x + k.y); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] c The color to convert to Rec. 709. +/// +/// @returns +/// The color in Rec.709 space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxRec709FromLinearHalf(FfxFloat16x2 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099); + return clamp(j.xx, c * j.yy, pow(c, j.zz) * k.xx + k.yy); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] c The color to convert to Rec. 709. +/// +/// @returns +/// The color in Rec.709 space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxRec709FromLinearHalf(FfxFloat16x3 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099); + return clamp(j.xxx, c * j.yyy, pow(c, j.zzz) * k.xxx + k.yyy); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGammaHalf. +/// +/// @param [in] c The value to convert to gamma space from linear. +/// @param [in] rcpX The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxGammaFromLinearHalf(FfxFloat16 c, FfxFloat16 rcpX) +{ + return pow(c, FFXM_BROADCAST_FLOAT16(rcpX)); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGammaHalf. +/// +/// @param [in] c The value to convert to gamma space from linear. +/// @param [in] rcpX The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxGammaFromLinearHalf(FfxFloat16x2 c, FfxFloat16 rcpX) +{ + return pow(c, FFXM_BROADCAST_FLOAT16X2(rcpX)); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGammaHalf. +/// +/// @param [in] c The value to convert to gamma space from linear. +/// @param [in] rcpX The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxGammaFromLinearHalf(FfxFloat16x3 c, FfxFloat16 rcpX) +{ + return pow(c, FFXM_BROADCAST_FLOAT16X3(rcpX)); +} + +/// Compute an SRGB value from a linear value. +/// +/// @param [in] c The value to convert to SRGB from linear. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxSrgbFromLinearHalf(FfxFloat16 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055); + return clamp(j.x, c * j.y, pow(c, j.z) * k.x + k.y); +} + +/// Compute an SRGB value from a linear value. +/// +/// @param [in] c The value to convert to SRGB from linear. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxSrgbFromLinearHalf(FfxFloat16x2 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055); + return clamp(j.xx, c * j.yy, pow(c, j.zz) * k.xx + k.yy); +} + +/// Compute an SRGB value from a linear value. +/// +/// @param [in] c The value to convert to SRGB from linear. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxSrgbFromLinearHalf(FfxFloat16x3 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055); + return clamp(j.xxx, c * j.yyy, pow(c, j.zzz) * k.xxx + k.yyy); +} + +/// Compute the square root of a value. +/// +/// @param [in] c The value to compute the square root for. +/// +/// @returns +/// A square root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16 ffxSquareRootHalf(FfxFloat16 c) +{ + return sqrt(c); +} + +/// Compute the square root of a value. +/// +/// @param [in] c The value to compute the square root for. +/// +/// @returns +/// A square root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxSquareRootHalf(FfxFloat16x2 c) +{ + return sqrt(c); +} + +/// Compute the square root of a value. +/// +/// @param [in] c The value to compute the square root for. +/// +/// @returns +/// A square root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxSquareRootHalf(FfxFloat16x3 c) +{ + return sqrt(c); +} + +/// Compute the cube root of a value. +/// +/// @param [in] c The value to compute the cube root for. +/// +/// @returns +/// A cube root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16 ffxCubeRootHalf(FfxFloat16 c) +{ + return pow(c, FFXM_BROADCAST_FLOAT16(1.0 / 3.0)); +} + +/// Compute the cube root of a value. +/// +/// @param [in] c The value to compute the cube root for. +/// +/// @returns +/// A cube root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxCubeRootHalf(FfxFloat16x2 c) +{ + return pow(c, FFXM_BROADCAST_FLOAT16X2(1.0 / 3.0)); +} + +/// Compute the cube root of a value. +/// +/// @param [in] c The value to compute the cube root for. +/// +/// @returns +/// A cube root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxCubeRootHalf(FfxFloat16x3 c) +{ + return pow(c, FFXM_BROADCAST_FLOAT16X3(1.0 / 3.0)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] c The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxLinearFromRec709Half(FfxFloat16 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] c The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxLinearFromRec709Half(FfxFloat16x2 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] c The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxLinearFromRec709Half(FfxFloat16x3 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in gamma space. +/// @param [in] x The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxLinearFromGammaHalf(FfxFloat16 c, FfxFloat16 x) +{ + return pow(c, FFXM_BROADCAST_FLOAT16(x)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in gamma space. +/// @param [in] x The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxLinearFromGammaHalf(FfxFloat16x2 c, FfxFloat16 x) +{ + return pow(c, FFXM_BROADCAST_FLOAT16X2(x)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in gamma space. +/// @param [in] x The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxLinearFromGammaHalf(FfxFloat16x3 c, FfxFloat16 x) +{ + return pow(c, FFXM_BROADCAST_FLOAT16X3(x)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxLinearFromSrgbHalf(FfxFloat16x3 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz)); +} + +/// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear. +/// +/// 543210 +/// ====== +/// ..xxx. +/// yy...y +/// +/// @param [in] a The input 1D coordinates to remap. +/// +/// @returns +/// The remapped 2D coordinates. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxRemapForQuadHalf(FfxUInt32 a) +{ + return FfxUInt16x2(bitfieldExtract(a, 1u, 3u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), a, 1u)); +} + +/// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions. +/// +/// The 64-wide lane indices to 8x8 remapping is performed as follows: +/// +/// 00 01 08 09 10 11 18 19 +/// 02 03 0a 0b 12 13 1a 1b +/// 04 05 0c 0d 14 15 1c 1d +/// 06 07 0e 0f 16 17 1e 1f +/// 20 21 28 29 30 31 38 39 +/// 22 23 2a 2b 32 33 3a 3b +/// 24 25 2c 2d 34 35 3c 3d +/// 26 27 2e 2f 36 37 3e 3f +/// +/// @param [in] a The input 1D coordinate to remap. +/// +/// @returns +/// The remapped 2D coordinates. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxRemapForWaveReductionHalf(FfxUInt32 a) +{ + return FfxUInt16x2(bitfieldInsertMask(bitfieldExtract(a, 2u, 3u), a, 1u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), bitfieldExtract(a, 1u, 2u), 2u)); +} + +#endif // FFXM_HALF diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common_half.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common_half.h.meta new file mode 100644 index 0000000..7b4903e --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common_half.h.meta @@ -0,0 +1,67 @@ +fileFormatVersion: 2 +guid: b5e484d04abc3c84788c93d9a2e50b7f +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Android: 1 + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_hlsl.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_hlsl.h new file mode 100644 index 0000000..9696c28 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_hlsl.h @@ -0,0 +1,1643 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +/// @defgroup HLSLCore HLSL Core +/// HLSL core defines and functions +/// +/// @ingroup FfxHLSL + +/// A define for abstracting shared memory between shading languages. +/// +/// @ingroup HLSLCore +#define FFXM_GROUPSHARED groupshared + +/// A define for abstracting compute memory barriers between shading languages. +/// +/// @ingroup HLSLCore +#define FFXM_GROUP_MEMORY_BARRIER() GroupMemoryBarrierWithGroupSync() + +/// A define for abstracting compute atomic additions between shading languages. +/// +/// @ingroup HLSLCore +#define FFXM_ATOMIC_ADD(x, y) InterlockedAdd(x, y) + +/// A define added to accept static markup on functions to aid CPU/GPU portability of code. +/// +/// @ingroup HLSLCore +#define FFXM_STATIC static + +/// A define for abstracting loop unrolling between shading languages. +/// +/// @ingroup HLSLCore +#define FFXM_UNROLL [unroll] + +/// A define for abstracting a 'greater than' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFXM_GREATER_THAN(x, y) x > y + +/// A define for abstracting a 'greater than or equal' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFXM_GREATER_THAN_EQUAL(x, y) x >= y + +/// A define for abstracting a 'less than' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFXM_LESS_THAN(x, y) x < y + +/// A define for abstracting a 'less than or equal' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFXM_LESS_THAN_EQUAL(x, y) x <= y + +/// A define for abstracting an 'equal' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFXM_EQUAL(x, y) x == y + +/// A define for abstracting a 'not equal' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFXM_NOT_EQUAL(x, y) x != y + +/// A define for abstracting matrix multiply operations between shading languages. +/// +/// @ingroup HLSLCore +#define FFXM_MATRIX_MULTIPLY(a, b) mul(a, b) + +/// A define for abstracting vector transformations between shading languages. +/// +/// @ingroup HLSLCore +#define FFXM_TRANSFORM_VECTOR(a, b) mul(a, b) + +/// A define for abstracting modulo operations between shading languages. +/// +/// @ingroup HLSLCore +#define FFXM_MODULO(a, b) (fmod(a, b)) + +/// Broadcast a scalar value to a 1-dimensional floating point vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_FLOAT32(x) FfxFloat32(x) + +/// Broadcast a scalar value to a 2-dimensional floating point vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_FLOAT32X2(x) FfxFloat32(x) + +/// Broadcast a scalar value to a 3-dimensional floating point vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_FLOAT32X3(x) FfxFloat32(x) + +/// Broadcast a scalar value to a 4-dimensional floating point vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_FLOAT32X4(x) FfxFloat32(x) + +/// Broadcast a scalar value to a 1-dimensional unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_UINT32(x) FfxUInt32(x) + +/// Broadcast a scalar value to a 2-dimensional unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_UINT32X2(x) FfxUInt32(x) + +/// Broadcast a scalar value to a 4-dimensional unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_UINT32X3(x) FfxUInt32(x) + +/// Broadcast a scalar value to a 4-dimensional unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_UINT32X4(x) FfxUInt32(x) + +/// Broadcast a scalar value to a 1-dimensional signed integer vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_INT32(x) FfxInt32(x) + +/// Broadcast a scalar value to a 2-dimensional signed integer vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_INT32X2(x) FfxInt32(x) + +/// Broadcast a scalar value to a 3-dimensional signed integer vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_INT32X3(x) FfxInt32(x) + +/// Broadcast a scalar value to a 4-dimensional signed integer vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_INT32X4(x) FfxInt32(x) + +/// Broadcast a scalar value to a 1-dimensional half-precision floating point vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_MIN_FLOAT16(a) FFXM_MIN16_F(a) + +/// Broadcast a scalar value to a 2-dimensional half-precision floating point vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_MIN_FLOAT16X2(a) FFXM_MIN16_F(a) + +/// Broadcast a scalar value to a 3-dimensional half-precision floating point vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_MIN_FLOAT16X3(a) FFXM_MIN16_F(a) + +/// Broadcast a scalar value to a 4-dimensional half-precision floating point vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_MIN_FLOAT16X4(a) FFXM_MIN16_F(a) + +/// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_MIN_UINT16(a) FFXM_MIN16_U(a) + +/// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_MIN_UINT16X2(a) FFXM_MIN16_U(a) + +/// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_MIN_UINT16X3(a) FFXM_MIN16_U(a) + +/// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_MIN_UINT16X4(a) FFXM_MIN16_U(a) + +/// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_MIN_INT16(a) FFXM_MIN16_I(a) + +/// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_MIN_INT16X2(a) FFXM_MIN16_I(a) + +/// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_MIN_INT16X3(a) FFXM_MIN16_I(a) + +/// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector. +/// +/// @ingroup HLSLCore +#define FFXM_BROADCAST_MIN_INT16X4(a) FFXM_MIN16_I(a) + +/// Pack 2x32-bit floating point values in a single 32bit value. +/// +/// This function first converts each component of value into their nearest 16-bit floating +/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the +/// 32bit unsigned integer respectively. +/// +/// @param [in] value A 2-dimensional floating point value to convert and pack. +/// +/// @returns +/// A packed 32bit value containing 2 16bit floating point values. +/// +/// @ingroup HLSLCore +FfxUInt32 packHalf2x16(FfxFloat32x2 value) +{ + return f32tof16(value.x) | (f32tof16(value.y) << 16); +} + +/// Broadcast a scalar value to a 2-dimensional floating point vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 2-dimensional floating point vector with value in each component. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxBroadcast2(FfxFloat32 value) +{ + return FfxFloat32x2(value, value); +} + +/// Broadcast a scalar value to a 3-dimensional floating point vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 3-dimensional floating point vector with value in each component. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxBroadcast3(FfxFloat32 value) +{ + return FfxFloat32x3(value, value, value); +} + +/// Broadcast a scalar value to a 4-dimensional floating point vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 4-dimensional floating point vector with value in each component. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxBroadcast4(FfxFloat32 value) +{ + return FfxFloat32x4(value, value, value, value); +} + +/// Broadcast a scalar value to a 2-dimensional signed integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 2-dimensional signed integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxInt32x2 ffxBroadcast2(FfxInt32 value) +{ + return FfxInt32x2(value, value); +} + +/// Broadcast a scalar value to a 3-dimensional signed integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 3-dimensional signed integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxUInt32x3 ffxBroadcast3(FfxInt32 value) +{ + return FfxUInt32x3(value, value, value); +} + +/// Broadcast a scalar value to a 4-dimensional signed integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 4-dimensional signed integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxInt32x4 ffxBroadcast4(FfxInt32 value) +{ + return FfxInt32x4(value, value, value, value); +} + +/// Broadcast a scalar value to a 2-dimensional unsigned integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 2-dimensional unsigned integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxUInt32x2 ffxBroadcast2(FfxUInt32 value) +{ + return FfxUInt32x2(value, value); +} + +/// Broadcast a scalar value to a 3-dimensional unsigned integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 3-dimensional unsigned integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxUInt32x3 ffxBroadcast3(FfxUInt32 value) +{ + return FfxUInt32x3(value, value, value); +} + +/// Broadcast a scalar value to a 4-dimensional unsigned integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 4-dimensional unsigned integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxUInt32x4 ffxBroadcast4(FfxUInt32 value) +{ + return FfxUInt32x4(value, value, value, value); +} + +FfxUInt32 bitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits) +{ + FfxUInt32 mask = (1u << bits) - 1; + return (src >> off) & mask; +} + +FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask) +{ + return (ins & mask) | (src & (~mask)); +} + +FfxUInt32 bitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits) +{ + FfxUInt32 mask = (1u << bits) - 1; + return (ins & mask) | (src & (~mask)); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup HLSLCore +FfxUInt32 ffxAsUInt32(FfxFloat32 x) +{ + return asuint(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup HLSLCore +FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x) +{ + return asuint(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup HLSLCore +FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x) +{ + return asuint(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup HLSLCore +FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x) +{ + return asuint(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxAsFloat(FfxUInt32 x) +{ + return asfloat(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x) +{ + return asfloat(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x) +{ + return asfloat(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x) +{ + return asfloat(x); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t) +{ + return lerp(x, y, t); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxSaturate(FfxFloat32 x) +{ + return saturate(x); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxSaturate(FfxFloat32x2 x) +{ + return saturate(x); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxSaturate(FfxFloat32x3 x) +{ + return saturate(x); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxSaturate(FfxFloat32x4 x) +{ + return saturate(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxFract(FfxFloat32 x) +{ + return x - floor(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxFract(FfxFloat32x2 x) +{ + return x - floor(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxFract(FfxFloat32x3 x) +{ + return x - floor(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxFract(FfxFloat32x4 x) +{ + return x - floor(x); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) +{ + return max(x, max(y, z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z) +{ + return max(min(x, y), min(max(x, y), z)); + // return min(max(min(y, z), x), max(y, z)); + // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z) +{ + return max(min(x, y), min(max(x, y), z)); + // return min(max(min(y, z), x), max(y, z)); + // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_I32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calculation. +/// @param [in] z The third value to include in the min calculation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) +{ + return min(x, min(y, z)); +} + + +FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b) +{ + return FfxUInt32(FfxInt32(a) >> FfxInt32(b)); +} + +FfxUInt32 ffxPackF32(FfxFloat32x2 v){ + FfxUInt32x2 p = FfxUInt32x2(f32tof16(FfxFloat32x2(v).x), f32tof16(FfxFloat32x2(v).y)); + return p.x | (p.y << 16); +} + +FfxFloat32x2 ffxUnpackF32(FfxUInt32 a){ + return f16tof32(FfxUInt32x2(a & 0xFFFF, a >> 16)); +} + +//============================================================================================================================== +// HLSL HALF +//============================================================================================================================== +//============================================================================================================================== +// Need to use manual unpack to get optimal execution (don't use packed types in buffers directly). +// Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/ +FFXM_MIN16_F2 ffxUint32ToFloat16x2(FfxUInt32 x) +{ + FfxFloat32x2 t = f16tof32(FfxUInt32x2(x & 0xFFFF, x >> 16)); + return FFXM_MIN16_F2(t); +} +FFXM_MIN16_F4 ffxUint32x2ToFloat16x4(FfxUInt32x2 x) +{ + return FFXM_MIN16_F4(ffxUint32ToFloat16x2(x.x), ffxUint32ToFloat16x2(x.y)); +} +FFXM_MIN16_U2 ffxUint32ToUint16x2(FfxUInt32 x) +{ + FfxUInt32x2 t = FfxUInt32x2(x & 0xFFFF, x >> 16); + return FFXM_MIN16_U2(t); +} +FFXM_MIN16_U4 ffxUint32x2ToUint16x4(FfxUInt32x2 x) +{ + return FFXM_MIN16_U4(ffxUint32ToUint16x2(x.x), ffxUint32ToUint16x2(x.y)); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32 ffxInvertSafe(FfxFloat32 v){ + FfxFloat32 s = sign(v); + FfxFloat32 s2 = s*s; + return s2/(v + s2 - 1.0); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x2 ffxInvertSafe(FfxFloat32x2 v){ + FfxFloat32x2 s = sign(v); + FfxFloat32x2 s2 = s*s; + return s2/(v + s2 - FfxFloat32x2(1.0, 1.0)); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x3 ffxInvertSafe(FfxFloat32x3 v){ + FfxFloat32x3 s = sign(v); + FfxFloat32x3 s2 = s*s; + return s2/(v + s2 - FfxFloat32x3(1.0, 1.0, 1.0)); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x4 ffxInvertSafe(FfxFloat32x4 v){ + FfxFloat32x4 s = sign(v); + FfxFloat32x4 s2 = s*s; + return s2/(v + s2 - FfxFloat32x4(1.0, 1.0, 1.0, 1.0)); +} + +#define FFXM_UINT32_TO_FLOAT16X2(x) ffxUint32ToFloat16x2(FfxUInt32(x)) +#if FFXM_HALF + +#define FFXM_UINT32X2_TO_FLOAT16X4(x) ffxUint32x2ToFloat16x4(FfxUInt32x2(x)) +#define FFXM_UINT32_TO_UINT16X2(x) ffxUint32ToUint16x2(FfxUInt32(x)) +#define FFXM_UINT32X2_TO_UINT16X4(x) ffxUint32x2ToUint16x4(FfxUInt32x2(x)) + +FfxUInt32 ffxPackF16(FfxFloat16x2 v){ + FfxUInt32x2 p = FfxUInt32x2(f32tof16(FfxFloat32x2(v).x), f32tof16(FfxFloat32x2(v).y)); + return p.x | (p.y << 16); +} + +FfxFloat16x2 ffxUnpackF16(FfxUInt32 a){ + return FfxFloat16x2(f16tof32(FfxUInt32x2(a & 0xFFFF, a >> 16))); +} + +//------------------------------------------------------------------------------------------------------------------------------ +FfxUInt32 FFXM_MIN16_F2ToUint32(FFXM_MIN16_F2 x) +{ + return f32tof16(x.x) + (f32tof16(x.y) << 16); +} +FfxUInt32x2 FFXM_MIN16_F4ToUint32x2(FFXM_MIN16_F4 x) +{ + return FfxUInt32x2(FFXM_MIN16_F2ToUint32(x.xy), FFXM_MIN16_F2ToUint32(x.zw)); +} +FfxUInt32 FFXM_MIN16_U2ToUint32(FFXM_MIN16_U2 x) +{ + return FfxUInt32(x.x) + (FfxUInt32(x.y) << 16); +} +FfxUInt32x2 FFXM_MIN16_U4ToUint32x2(FFXM_MIN16_U4 x) +{ + return FfxUInt32x2(FFXM_MIN16_U2ToUint32(x.xy), FFXM_MIN16_U2ToUint32(x.zw)); +} +#define FFXM_FLOAT16X2_TO_UINT32(x) FFXM_MIN16_F2ToUint32(FFXM_MIN16_F2(x)) +#define FFXM_FLOAT16X4_TO_UINT32X2(x) FFXM_MIN16_F4ToUint32x2(FFXM_MIN16_F4(x)) +#define FFXM_UINT16X2_TO_UINT32(x) FFXM_MIN16_U2ToUint32(FFXM_MIN16_U2(x)) +#define FFXM_UINT16X4_TO_UINT32X2(x) FFXM_MIN16_U4ToUint32x2(FFXM_MIN16_U4(x)) + +#if (FFXM_HLSL_6_2) && !defined(FFXM_NO_16_BIT_CAST) +#define FFXM_TO_UINT16(x) asuint16(x) +#define FFXM_TO_UINT16X2(x) asuint16(x) +#define FFXM_TO_UINT16X3(x) asuint16(x) +#define FFXM_TO_UINT16X4(x) asuint16(x) +#else +#define FFXM_TO_UINT16(a) FFXM_MIN16_U(f32tof16(FfxFloat32(a))) +#define FFXM_TO_UINT16X2(a) FFXM_MIN16_U2(FFXM_TO_UINT16((a).x), FFXM_TO_UINT16((a).y)) +#define FFXM_TO_UINT16X3(a) FFXM_MIN16_U3(FFXM_TO_UINT16((a).x), FFXM_TO_UINT16((a).y), FFXM_TO_UINT16((a).z)) +#define FFXM_TO_UINT16X4(a) FFXM_MIN16_U4(FFXM_TO_UINT16((a).x), FFXM_TO_UINT16((a).y), FFXM_TO_UINT16((a).z), FFXM_TO_UINT16((a).w)) +#endif // #if (FFXM_HLSL_6_2) && !defined(FFXM_NO_16_BIT_CAST) + +#if (FFXM_HLSL_6_2) && !defined(FFXM_NO_16_BIT_CAST) +#define FFXM_TO_FLOAT16(x) asfloat16(x) +#define FFXM_TO_FLOAT16X2(x) asfloat16(x) +#define FFXM_TO_FLOAT16X3(x) asfloat16(x) +#define FFXM_TO_FLOAT16X4(x) asfloat16(x) +#else +#define FFXM_TO_FLOAT16(a) FFXM_MIN16_F(f16tof32(FfxUInt32(a))) +#define FFXM_TO_FLOAT16X2(a) FFXM_MIN16_F2(FFXM_TO_FLOAT16((a).x), FFXM_TO_FLOAT16((a).y)) +#define FFXM_TO_FLOAT16X3(a) FFXM_MIN16_F3(FFXM_TO_FLOAT16((a).x), FFXM_TO_FLOAT16((a).y), FFXM_TO_FLOAT16((a).z)) +#define FFXM_TO_FLOAT16X4(a) FFXM_MIN16_F4(FFXM_TO_FLOAT16((a).x), FFXM_TO_FLOAT16((a).y), FFXM_TO_FLOAT16((a).z), FFXM_TO_FLOAT16((a).w)) +#endif // #if (FFXM_HLSL_6_2) && !defined(FFXM_NO_16_BIT_CAST) + +//============================================================================================================================== +#define FFXM_BROADCAST_FLOAT16(a) FFXM_MIN16_F(a) +#define FFXM_BROADCAST_FLOAT16X2(a) FFXM_MIN16_F(a) +#define FFXM_BROADCAST_FLOAT16X3(a) FFXM_MIN16_F(a) +#define FFXM_BROADCAST_FLOAT16X4(a) FFXM_MIN16_F(a) + +//------------------------------------------------------------------------------------------------------------------------------ +#define FFXM_BROADCAST_INT16(a) FFXM_MIN16_I(a) +#define FFXM_BROADCAST_INT16X2(a) FFXM_MIN16_I(a) +#define FFXM_BROADCAST_INT16X3(a) FFXM_MIN16_I(a) +#define FFXM_BROADCAST_INT16X4(a) FFXM_MIN16_I(a) + +//------------------------------------------------------------------------------------------------------------------------------ +#define FFXM_BROADCAST_UINT16(a) FFXM_MIN16_U(a) +#define FFXM_BROADCAST_UINT16X2(a) FFXM_MIN16_U(a) +#define FFXM_BROADCAST_UINT16X3(a) FFXM_MIN16_U(a) +#define FFXM_BROADCAST_UINT16X4(a) FFXM_MIN16_U(a) + +//============================================================================================================================== +FFXM_MIN16_U ffxAbsHalf(FFXM_MIN16_U a) +{ + return FFXM_MIN16_U(abs(FFXM_MIN16_I(a))); +} +FFXM_MIN16_U2 ffxAbsHalf(FFXM_MIN16_U2 a) +{ + return FFXM_MIN16_U2(abs(FFXM_MIN16_I2(a))); +} +FFXM_MIN16_U3 ffxAbsHalf(FFXM_MIN16_U3 a) +{ + return FFXM_MIN16_U3(abs(FFXM_MIN16_I3(a))); +} +FFXM_MIN16_U4 ffxAbsHalf(FFXM_MIN16_U4 a) +{ + return FFXM_MIN16_U4(abs(FFXM_MIN16_I4(a))); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFXM_MIN16_F ffxClampHalf(FFXM_MIN16_F x, FFXM_MIN16_F n, FFXM_MIN16_F m) +{ + return max(n, min(x, m)); +} +FFXM_MIN16_F2 ffxClampHalf(FFXM_MIN16_F2 x, FFXM_MIN16_F2 n, FFXM_MIN16_F2 m) +{ + return max(n, min(x, m)); +} +FFXM_MIN16_F3 ffxClampHalf(FFXM_MIN16_F3 x, FFXM_MIN16_F3 n, FFXM_MIN16_F3 m) +{ + return max(n, min(x, m)); +} +FFXM_MIN16_F4 ffxClampHalf(FFXM_MIN16_F4 x, FFXM_MIN16_F4 n, FFXM_MIN16_F4 m) +{ + return max(n, min(x, m)); +} +//------------------------------------------------------------------------------------------------------------------------------ +// V_FRACT_F16 (note DX frac() is different). +FFXM_MIN16_F ffxFract(FFXM_MIN16_F x) +{ + return x - floor(x); +} +FFXM_MIN16_F2 ffxFract(FFXM_MIN16_F2 x) +{ + return x - floor(x); +} +FFXM_MIN16_F3 ffxFract(FFXM_MIN16_F3 x) +{ + return x - floor(x); +} +FFXM_MIN16_F4 ffxFract(FFXM_MIN16_F4 x) +{ + return x - floor(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFXM_MIN16_F ffxLerp(FFXM_MIN16_F x, FFXM_MIN16_F y, FFXM_MIN16_F a) +{ + return lerp(x, y, a); +} +FFXM_MIN16_F2 ffxLerp(FFXM_MIN16_F2 x, FFXM_MIN16_F2 y, FFXM_MIN16_F a) +{ + return lerp(x, y, a); +} +FFXM_MIN16_F2 ffxLerp(FFXM_MIN16_F2 x, FFXM_MIN16_F2 y, FFXM_MIN16_F2 a) +{ + return lerp(x, y, a); +} +FFXM_MIN16_F3 ffxLerp(FFXM_MIN16_F3 x, FFXM_MIN16_F3 y, FFXM_MIN16_F a) +{ + return lerp(x, y, a); +} +FFXM_MIN16_F3 ffxLerp(FFXM_MIN16_F3 x, FFXM_MIN16_F3 y, FFXM_MIN16_F3 a) +{ + return lerp(x, y, a); +} +FFXM_MIN16_F4 ffxLerp(FFXM_MIN16_F4 x, FFXM_MIN16_F4 y, FFXM_MIN16_F a) +{ + return lerp(x, y, a); +} +FFXM_MIN16_F4 ffxLerp(FFXM_MIN16_F4 x, FFXM_MIN16_F4 y, FFXM_MIN16_F4 a) +{ + return lerp(x, y, a); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFXM_MIN16_F ffxMax3Half(FFXM_MIN16_F x, FFXM_MIN16_F y, FFXM_MIN16_F z) +{ + return max(x, max(y, z)); +} +FFXM_MIN16_F2 ffxMax3Half(FFXM_MIN16_F2 x, FFXM_MIN16_F2 y, FFXM_MIN16_F2 z) +{ + return max(x, max(y, z)); +} +FFXM_MIN16_F3 ffxMax3Half(FFXM_MIN16_F3 x, FFXM_MIN16_F3 y, FFXM_MIN16_F3 z) +{ + return max(x, max(y, z)); +} +FFXM_MIN16_F4 ffxMax3Half(FFXM_MIN16_F4 x, FFXM_MIN16_F4 y, FFXM_MIN16_F4 z) +{ + return max(x, max(y, z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFXM_MIN16_F ffxMin3Half(FFXM_MIN16_F x, FFXM_MIN16_F y, FFXM_MIN16_F z) +{ + return min(x, min(y, z)); +} +FFXM_MIN16_F2 ffxMin3Half(FFXM_MIN16_F2 x, FFXM_MIN16_F2 y, FFXM_MIN16_F2 z) +{ + return min(x, min(y, z)); +} +FFXM_MIN16_F3 ffxMin3Half(FFXM_MIN16_F3 x, FFXM_MIN16_F3 y, FFXM_MIN16_F3 z) +{ + return min(x, min(y, z)); +} +FFXM_MIN16_F4 ffxMin3Half(FFXM_MIN16_F4 x, FFXM_MIN16_F4 y, FFXM_MIN16_F4 z) +{ + return min(x, min(y, z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFXM_MIN16_F ffxMed3Half(FFXM_MIN16_F x, FFXM_MIN16_F y, FFXM_MIN16_F z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFXM_MIN16_F2 ffxMed3Half(FFXM_MIN16_F2 x, FFXM_MIN16_F2 y, FFXM_MIN16_F2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFXM_MIN16_F3 ffxMed3Half(FFXM_MIN16_F3 x, FFXM_MIN16_F3 y, FFXM_MIN16_F3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFXM_MIN16_F4 ffxMed3Half(FFXM_MIN16_F4 x, FFXM_MIN16_F4 y, FFXM_MIN16_F4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFXM_MIN16_I ffxMed3Half(FFXM_MIN16_I x, FFXM_MIN16_I y, FFXM_MIN16_I z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFXM_MIN16_I2 ffxMed3Half(FFXM_MIN16_I2 x, FFXM_MIN16_I2 y, FFXM_MIN16_I2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFXM_MIN16_I3 ffxMed3Half(FFXM_MIN16_I3 x, FFXM_MIN16_I3 y, FFXM_MIN16_I3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFXM_MIN16_I4 ffxMed3Half(FFXM_MIN16_I4 x, FFXM_MIN16_I4 y, FFXM_MIN16_I4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFXM_MIN16_F ffxReciprocalHalf(FFXM_MIN16_F x) +{ + return rcp(x); +} +FFXM_MIN16_F2 ffxReciprocalHalf(FFXM_MIN16_F2 x) +{ + return rcp(x); +} +FFXM_MIN16_F3 ffxReciprocalHalf(FFXM_MIN16_F3 x) +{ + return rcp(x); +} +FFXM_MIN16_F4 ffxReciprocalHalf(FFXM_MIN16_F4 x) +{ + return rcp(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFXM_MIN16_F ffxReciprocalSquareRootHalf(FFXM_MIN16_F x) +{ + return rsqrt(x); +} +FFXM_MIN16_F2 ffxReciprocalSquareRootHalf(FFXM_MIN16_F2 x) +{ + return rsqrt(x); +} +FFXM_MIN16_F3 ffxReciprocalSquareRootHalf(FFXM_MIN16_F3 x) +{ + return rsqrt(x); +} +FFXM_MIN16_F4 ffxReciprocalSquareRootHalf(FFXM_MIN16_F4 x) +{ + return rsqrt(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFXM_MIN16_F ffxSaturate(FFXM_MIN16_F x) +{ + return saturate(x); +} +FFXM_MIN16_F2 ffxSaturate(FFXM_MIN16_F2 x) +{ + return saturate(x); +} +FFXM_MIN16_F3 ffxSaturate(FFXM_MIN16_F3 x) +{ + return saturate(x); +} +FFXM_MIN16_F4 ffxSaturate(FFXM_MIN16_F4 x) +{ + return saturate(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFXM_MIN16_U ffxBitShiftRightHalf(FFXM_MIN16_U a, FFXM_MIN16_U b) +{ + return FFXM_MIN16_U(FFXM_MIN16_I(a) >> FFXM_MIN16_I(b)); +} +FFXM_MIN16_U2 ffxBitShiftRightHalf(FFXM_MIN16_U2 a, FFXM_MIN16_U2 b) +{ + return FFXM_MIN16_U2(FFXM_MIN16_I2(a) >> FFXM_MIN16_I2(b)); +} +FFXM_MIN16_U3 ffxBitShiftRightHalf(FFXM_MIN16_U3 a, FFXM_MIN16_U3 b) +{ + return FFXM_MIN16_U3(FFXM_MIN16_I3(a) >> FFXM_MIN16_I3(b)); +} +FFXM_MIN16_U4 ffxBitShiftRightHalf(FFXM_MIN16_U4 a, FFXM_MIN16_U4 b) +{ + return FFXM_MIN16_U4(FFXM_MIN16_I4(a) >> FFXM_MIN16_I4(b)); +} +#endif // FFXM_HALF + +//============================================================================================================================== +// HLSL WAVE +//============================================================================================================================== +#if defined(FFXM_WAVE) +// Where 'x' must be a compile time literal. +FfxFloat32 AWaveXorF1(FfxFloat32 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxFloat32x2 AWaveXorF2(FfxFloat32x2 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxFloat32x3 AWaveXorF3(FfxFloat32x3 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxFloat32x4 AWaveXorF4(FfxFloat32x4 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxUInt32 AWaveXorU1(FfxUInt32 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxUInt32x2 AWaveXorU1(FfxUInt32x2 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxUInt32x3 AWaveXorU1(FfxUInt32x3 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxUInt32x4 AWaveXorU1(FfxUInt32x4 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxBoolean AWaveIsFirstLane() +{ + return WaveIsFirstLane(); +} +FfxUInt32 AWaveLaneIndex() +{ + return WaveGetLaneIndex(); +} +FfxBoolean AWaveReadAtLaneIndexB1(FfxBoolean v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, x); +} +FfxUInt32 AWavePrefixCountBits(FfxBoolean v) +{ + return WavePrefixCountBits(v); +} +FfxUInt32 AWaveActiveCountBits(FfxBoolean v) +{ + return WaveActiveCountBits(v); +} +FfxUInt32 AWaveReadLaneFirstU1(FfxUInt32 v) +{ + return WaveReadLaneFirst(v); +} +FfxUInt32 WaveOr(FfxUInt32 a) +{ + return WaveActiveBitOr(a); +} +FfxFloat32 WaveMin(FfxFloat32 a) +{ + return WaveActiveMin(a); +} +FfxFloat32 WaveMax(FfxFloat32 a) +{ + return WaveActiveMax(a); +} +FfxUInt32 WaveLaneCount() +{ + return WaveGetLaneCount(); +} +FfxBoolean WaveAllTrue(FfxBoolean v) +{ + return WaveActiveAllTrue(v); +} +FfxFloat32 QuadReadX(FfxFloat32 v) +{ + return QuadReadAcrossX(v); +} +FfxFloat32x2 QuadReadX(FfxFloat32x2 v) +{ + return QuadReadAcrossX(v); +} +FfxFloat32 QuadReadY(FfxFloat32 v) +{ + return QuadReadAcrossY(v); +} +FfxFloat32x2 QuadReadY(FfxFloat32x2 v) +{ + return QuadReadAcrossY(v); +} + +#if FFXM_HALF +FfxFloat16x2 ffxWaveXorFloat16x2(FfxFloat16x2 v, FfxUInt32 x) +{ + return FFXM_UINT32_TO_FLOAT16X2(WaveReadLaneAt(FFXM_FLOAT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x)); +} +FfxFloat16x4 ffxWaveXorFloat16x4(FfxFloat16x4 v, FfxUInt32 x) +{ + return FFXM_UINT32X2_TO_FLOAT16X4(WaveReadLaneAt(FFXM_FLOAT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x)); +} +FfxUInt16x2 ffxWaveXorUint16x2(FfxUInt16x2 v, FfxUInt32 x) +{ + return FFXM_UINT32_TO_UINT16X2(WaveReadLaneAt(FFXM_UINT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x)); +} +FfxUInt16x4 ffxWaveXorUint16x4(FfxUInt16x4 v, FfxUInt32 x) +{ + return FFXM_UINT32X2_TO_UINT16X4(WaveReadLaneAt(FFXM_UINT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x)); +} +#endif // FFXM_HALF +#endif // #if defined(FFXM_WAVE) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_hlsl.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_hlsl.h.meta new file mode 100644 index 0000000..9f41d30 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_hlsl.h.meta @@ -0,0 +1,67 @@ +fileFormatVersion: 2 +guid: 3414d977001cdfc47846380911de9f05 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Android: 1 + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_portability.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_portability.h new file mode 100644 index 0000000..368cb2c --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_portability.h @@ -0,0 +1,50 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +FfxFloat32x3 opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +{ + d = a + ffxBroadcast3(b); + return d; +} + +FfxFloat32x3 opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a) +{ + d = a; + return d; +} + +FfxFloat32x3 opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b) +{ + d = a * b; + return d; +} + +FfxFloat32x3 opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +{ + d = a * ffxBroadcast3(b); + return d; +} + +FfxFloat32x3 opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a) +{ + d = rcp(a); + return d; +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_portability.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_portability.h.meta new file mode 100644 index 0000000..6858a54 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_portability.h.meta @@ -0,0 +1,67 @@ +fileFormatVersion: 2 +guid: c599687271fc4f444a2858745fc7f0c5 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Android: 1 + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl new file mode 100644 index 0000000..581eecf --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl @@ -0,0 +1,103 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#define FSR2_BIND_SRV_INPUT_EXPOSURE 0 +#define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS 1 +#if FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 2 +#else +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 2 +#endif +#define FSR2_BIND_SRV_INTERNAL_UPSCALED 3 +#define FSR2_BIND_SRV_LOCK_STATUS 4 +#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 5 +#define FSR2_BIND_SRV_LANCZOS_LUT 6 +#define FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT 7 +#define FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS 8 +#define FSR2_BIND_SRV_AUTO_EXPOSURE 9 +#define FSR2_BIND_SRV_LUMA_HISTORY 10 +#define FSR2_BIND_SRV_TEMPORAL_REACTIVE 11 + +#define FSR2_BIND_UAV_NEW_LOCKS 12 + +#define FSR2_BIND_CB_FSR2 0 + +// Global mandatory defines +#if !defined(FFXM_HALF) +#define FFXM_HALF 1 +#endif +#if !defined(FFXM_GPU) +#define FFXM_GPU 1 +#endif +#if !defined(FFXM_HLSL) +#define FFXM_HLSL 1 +#endif + +#include "fsr2/ffxm_fsr2_callbacks_hlsl.h" +#include "fsr2/ffxm_fsr2_common.h" +#include "fsr2/ffxm_fsr2_sample.h" +#include "fsr2/ffxm_fsr2_upsample.h" +#include "fsr2/ffxm_fsr2_postprocess_lock_status.h" +#include "fsr2/ffxm_fsr2_reproject.h" +#include "fsr2/ffxm_fsr2_accumulate.h" + +struct VertexOut +{ + float4 position : SV_POSITION; +}; + +struct AccumulateOutputsFS +{ +#if !FFXM_SHADER_QUALITY_BALANCED_OR_PERFORMANCE + FfxFloat32x4 fColorAndWeight : SV_TARGET0; + FfxFloat32x2 fLockStatus : SV_TARGET1; + FfxFloat32x4 fLumaHistory : SV_TARGET2; +#if FFXM_FSR2_OPTION_APPLY_SHARPENING == 0 + FfxFloat32x3 fColor : SV_TARGET3; +#endif +#else // FFXM_SHADER_QUALITY_BALANCED_OR_PERFORMANCE + FfxFloat32x3 fUpscaledColor : SV_TARGET0; + FfxFloat32 fTemporalReactive : SV_TARGET1; + FfxFloat32x2 fLockStatus : SV_TARGET2; +#if FFXM_FSR2_OPTION_APPLY_SHARPENING == 0 + FfxFloat32x3 fColor : SV_TARGET3; +#endif +#endif +}; + +AccumulateOutputsFS main(float4 SvPosition : SV_POSITION) +{ + uint2 uPixelCoord = uint2(SvPosition.xy); + AccumulateOutputs result = Accumulate(uPixelCoord); + AccumulateOutputsFS output = (AccumulateOutputsFS)0; +#if !FFXM_SHADER_QUALITY_BALANCED_OR_PERFORMANCE + output.fColorAndWeight = result.fColorAndWeight; + output.fLumaHistory = result.fLumaHistory; +#else + output.fUpscaledColor = result.fUpscaledColor; + output.fTemporalReactive = result.fTemporalReactive; +#endif + output.fLockStatus = result.fLockStatus; +#if FFXM_FSR2_OPTION_APPLY_SHARPENING == 0 + output.fColor = result.fColor; +#endif + return output; +} \ No newline at end of file diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl.meta new file mode 100644 index 0000000..7d82e1e --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 63de8005a89afab4298bbc1e2edf2a01 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl new file mode 100644 index 0000000..1f1472f --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl @@ -0,0 +1,83 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY 0 +#define FSR2_BIND_SRV_INPUT_COLOR 1 + +#define FSR2_BIND_CB_FSR2 0 +#define FSR2_BIND_CB_REACTIVE 1 + +// Global mandatory defines +#if !defined(FFXM_HALF) +#define FFXM_HALF 1 +#endif +#if !defined(FFXM_GPU) +#define FFXM_GPU 1 +#endif +#if !defined(FFXM_HLSL) +#define FFXM_HLSL 1 +#endif + +#include "fsr2/ffxm_fsr2_callbacks_hlsl.h" +#include "fsr2/ffxm_fsr2_common.h" + +struct GenReactiveMaskOutputs +{ + FfxFloat32 fReactiveMask : SV_TARGET0; +}; + +struct VertexOut +{ + float4 position : SV_POSITION; +}; + +GenReactiveMaskOutputs main(float4 SvPosition : SV_POSITION) +{ + uint2 uPixelCoord = uint2(SvPosition.xy); + + float3 ColorPreAlpha = LoadOpaqueOnly( FFXM_MIN16_I2(uPixelCoord) ).rgb; + float3 ColorPostAlpha = LoadInputColor(uPixelCoord).rgb; + + if (GenReactiveFlags() & FFXM_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP) + { + ColorPreAlpha = Tonemap(ColorPreAlpha); + ColorPostAlpha = Tonemap(ColorPostAlpha); + } + + if (GenReactiveFlags() & FFXM_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP) + { + ColorPreAlpha = InverseTonemap(ColorPreAlpha); + ColorPostAlpha = InverseTonemap(ColorPostAlpha); + } + + float out_reactive_value = 0.f; + float3 delta = abs(ColorPostAlpha - ColorPreAlpha); + + out_reactive_value = (GenReactiveFlags() & FFXM_FSR2_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX) ? max(delta.x, max(delta.y, delta.z)) : length(delta); + out_reactive_value *= GenReactiveScale(); + + out_reactive_value = (GenReactiveFlags() & FFXM_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD) ? (out_reactive_value < GenReactiveThreshold() ? 0 : GenReactiveBinaryValue()) : out_reactive_value; + + GenReactiveMaskOutputs results = (GenReactiveMaskOutputs)0; + results.fReactiveMask = out_reactive_value; + + return results; +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl.meta new file mode 100644 index 0000000..04141bb --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 59f96f119bfba924198951bea4194ecd +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl new file mode 100644 index 0000000..3e73211 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl @@ -0,0 +1,66 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#define FSR2_BIND_SRV_INPUT_COLOR 0 + +#define FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC 1 +#define FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE 2 +#define FSR2_BIND_UAV_EXPOSURE_MIP_5 3 +#define FSR2_BIND_UAV_AUTO_EXPOSURE 4 + +#define FSR2_BIND_CB_FSR2 0 +#define FSR2_BIND_CB_SPD 1 + +// Global mandatory defines +#if !defined(FFXM_HALF) +#define FFXM_HALF 1 +#endif +#if !defined(FFXM_GPU) +#define FFXM_GPU 1 +#endif +#if !defined(FFXM_HLSL) +#define FFXM_HLSL 1 +#endif + +#include "fsr2/ffxm_fsr2_callbacks_hlsl.h" +#include "fsr2/ffxm_fsr2_common.h" +#include "fsr2/ffxm_fsr2_compute_luminance_pyramid.h" + +#ifndef FFXM_FSR2_THREAD_GROUP_WIDTH +#define FFXM_FSR2_THREAD_GROUP_WIDTH 256 +#endif // #ifndef FFXM_FSR2_THREAD_GROUP_WIDTH +#ifndef FFXM_FSR2_THREAD_GROUP_HEIGHT +#define FFXM_FSR2_THREAD_GROUP_HEIGHT 1 +#endif // #ifndef FFXM_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFXM_FSR2_THREAD_GROUP_DEPTH +#define FFXM_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFXM_FSR2_THREAD_GROUP_DEPTH +#ifndef FFXM_FSR2_NUM_THREADS +#define FFXM_FSR2_NUM_THREADS [numthreads(FFXM_FSR2_THREAD_GROUP_WIDTH, FFXM_FSR2_THREAD_GROUP_HEIGHT, FFXM_FSR2_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFXM_FSR2_NUM_THREADS + +FFXM_PREFER_WAVE64 +FFXM_FSR2_NUM_THREADS +FFXM_FSR2_EMBED_CB2_ROOTSIG_CONTENT +void main(uint3 WorkGroupId : SV_GroupID, uint LocalThreadIndex : SV_GroupIndex) +{ + ComputeAutoExposure(WorkGroupId, LocalThreadIndex); +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl.meta new file mode 100644 index 0000000..a066167 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 4670a9ebaa60c3143be978efc227163b +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl new file mode 100644 index 0000000..bd3723a --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl @@ -0,0 +1,71 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#define FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 +#define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 1 +#define FSR2_BIND_SRV_DILATED_DEPTH 2 +#define FSR2_BIND_SRV_REACTIVE_MASK 3 +#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 4 +#define FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS 5 +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 6 +#define FSR2_BIND_SRV_INPUT_COLOR 7 +#define FSR2_BIND_SRV_INPUT_DEPTH 8 +#define FSR2_BIND_SRV_INPUT_EXPOSURE 9 + +#define FSR2_BIND_CB_FSR2 0 + +// Global mandatory defines +#if !defined(FFXM_HALF) +#define FFXM_HALF 1 +#endif +#if !defined(FFXM_GPU) +#define FFXM_GPU 1 +#endif +#if !defined(FFXM_HLSL) +#define FFXM_HLSL 1 +#endif + +#include "fsr2/ffxm_fsr2_callbacks_hlsl.h" +#include "fsr2/ffxm_fsr2_common.h" +#include "fsr2/ffxm_fsr2_sample.h" +#include "fsr2/ffxm_fsr2_depth_clip.h" + +struct VertexOut +{ + float4 position : SV_POSITION; +}; + + +struct DepthClipOutputsFS +{ + FfxFloat32x2 fDilatedReactiveMasks : SV_TARGET0; + FfxFloat32x4 fTonemapped : SV_TARGET1; +}; + +DepthClipOutputsFS main(float4 SvPosition : SV_POSITION) +{ + uint2 uPixelCoord = uint2(SvPosition.xy); + DepthClipOutputs result = DepthClip(uPixelCoord); + DepthClipOutputsFS output = (DepthClipOutputsFS)0; + output.fDilatedReactiveMasks = result.fDilatedReactiveMasks; + output.fTonemapped = result.fTonemapped; + return output; +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl.meta new file mode 100644 index 0000000..c6497c2 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 2825c941cb2d43145b426c42ec6e7869 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_lock_pass.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_lock_pass.hlsl new file mode 100644 index 0000000..f8166e1 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_lock_pass.hlsl @@ -0,0 +1,66 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#define FSR2_BIND_SRV_LOCK_INPUT_LUMA 0 + +#define FSR2_BIND_UAV_NEW_LOCKS 1 +#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 2 + +#define FSR2_BIND_CB_FSR2 0 + +// Global mandatory defines +#if !defined(FFXM_HALF) +#define FFXM_HALF 1 +#endif +#if !defined(FFXM_GPU) +#define FFXM_GPU 1 +#endif +#if !defined(FFXM_HLSL) +#define FFXM_HLSL 1 +#endif + +#include "fsr2/ffxm_fsr2_callbacks_hlsl.h" +#include "fsr2/ffxm_fsr2_common.h" +#include "fsr2/ffxm_fsr2_sample.h" +#include "fsr2/ffxm_fsr2_lock.h" + +#ifndef FFXM_FSR2_THREAD_GROUP_WIDTH +#define FFXM_FSR2_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFXM_FSR2_THREAD_GROUP_WIDTH +#ifndef FFXM_FSR2_THREAD_GROUP_HEIGHT +#define FFXM_FSR2_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFXM_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFXM_FSR2_THREAD_GROUP_DEPTH +#define FFXM_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFXM_FSR2_THREAD_GROUP_DEPTH +#ifndef FFXM_FSR2_NUM_THREADS +#define FFXM_FSR2_NUM_THREADS [numthreads(FFXM_FSR2_THREAD_GROUP_WIDTH, FFXM_FSR2_THREAD_GROUP_HEIGHT, FFXM_FSR2_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFXM_FSR2_NUM_THREADS + +FFXM_PREFER_WAVE64 +FFXM_FSR2_NUM_THREADS +FFXM_FSR2_EMBED_ROOTSIG_CONTENT +void main(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) +{ + uint2 uDispatchThreadId = uGroupId * uint2(FFXM_FSR2_THREAD_GROUP_WIDTH, FFXM_FSR2_THREAD_GROUP_HEIGHT) + uGroupThreadId; + + ComputeLock(uDispatchThreadId); +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_lock_pass.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_lock_pass.hlsl.meta new file mode 100644 index 0000000..6867472 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_lock_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 78914a065e6727e4d8255fb76b44d5da +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl new file mode 100644 index 0000000..ad49951 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl @@ -0,0 +1,60 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#define FSR2_BIND_SRV_INPUT_EXPOSURE 0 +#define FSR2_BIND_SRV_RCAS_INPUT 1 + +#define FSR2_BIND_CB_FSR2 0 +#define FSR2_BIND_CB_RCAS 1 + +// Global mandatory defines +#if !defined(FFXM_HALF) +#define FFXM_HALF 1 +#endif +#if !defined(FFXM_GPU) +#define FFXM_GPU 1 +#endif +#if !defined(FFXM_HLSL) +#define FFXM_HLSL 1 +#endif + +#include "fsr2/ffxm_fsr2_callbacks_hlsl.h" +#include "fsr2/ffxm_fsr2_common.h" +#include "fsr2/ffxm_fsr2_rcas.h" + +struct VertexOut +{ + float4 position : SV_POSITION; +}; + +struct RCASOutputsFS +{ + FfxFloat32x3 fUpscaledColor : SV_TARGET0; +}; + +RCASOutputsFS main(float4 SvPosition : SV_POSITION) +{ + uint2 uPixelCoord = uint2(SvPosition.xy); + RCASOutputs result = RCAS(uPixelCoord); + RCASOutputsFS output = (RCASOutputsFS)0; + output.fUpscaledColor = result.fUpscaledColor; + return output; +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl.meta new file mode 100644 index 0000000..ce5e743 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 16a306235fdc01044a347f0cb0a9b147 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl new file mode 100644 index 0000000..ef0a1b8 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl @@ -0,0 +1,69 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 0 +#define FSR2_BIND_SRV_INPUT_DEPTH 1 +#define FSR2_BIND_SRV_INPUT_COLOR 2 +#define FSR2_BIND_SRV_INPUT_EXPOSURE 3 + +#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 4 + +#define FSR2_BIND_CB_FSR2 0 + +// Global mandatory defines +#if !defined(FFXM_HALF) +#define FFXM_HALF 1 +#endif +#if !defined(FFXM_GPU) +#define FFXM_GPU 1 +#endif +#if !defined(FFXM_HLSL) +#define FFXM_HLSL 1 +#endif + +#include "fsr2/ffxm_fsr2_callbacks_hlsl.h" +#include "fsr2/ffxm_fsr2_common.h" +#include "fsr2/ffxm_fsr2_sample.h" +#include "fsr2/ffxm_fsr2_reconstruct_dilated_velocity_and_previous_depth.h" + +struct VertexOut +{ + float4 position : SV_POSITION; +}; + +struct ReconstructPrevDepthOutputsFS +{ + FfxFloat32 fDepth : SV_TARGET0; + FfxFloat32x2 fMotionVector : SV_TARGET1; + FfxFloat32 fLuma : SV_TARGET2; +}; + + +ReconstructPrevDepthOutputsFS main(float4 SvPosition : SV_POSITION) +{ + uint2 uPixelCoord = uint2(SvPosition.xy); + ReconstructPrevDepthOutputs result = ReconstructAndDilate(uPixelCoord); + ReconstructPrevDepthOutputsFS output = (ReconstructPrevDepthOutputsFS)0; + output.fDepth = result.fDepth; + output.fMotionVector = result.fMotionVector; + output.fLuma = result.fLuma; + return output; +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl.meta new file mode 100644 index 0000000..7888885 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: bdcb34025b67be743a32494703775cc1 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl new file mode 100644 index 0000000..d657150 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl @@ -0,0 +1,50 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +// Global mandatory defines +#if !defined(FFXM_HALF) +#define FFXM_HALF 1 +#endif +#if !defined(FFXM_GPU) +#define FFXM_GPU 1 +#endif +#if !defined(FFXM_HLSL) +#define FFXM_HLSL 1 +#endif + +#define FSR2_BIND_CB_FSR2 0 + +#include "fsr2/ffxm_fsr2_callbacks_hlsl.h" +#include "fsr2/ffxm_fsr2_common.h" + +struct VertexOut +{ + float4 position : SV_POSITION; +}; + +VertexOut main(uint uVertexId : SV_VERTEXID) +{ + VertexOut output; + float2 uv = float2(uVertexId & 1, uVertexId >> 1) * 2.0; + output.position = float4(uv * 2.0 - 1.0, 0.0, 1.0); + + return output; +} \ No newline at end of file diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl.meta new file mode 100644 index 0000000..5d65f24 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: a9dfeac9728e7404f97655aac002e5eb +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr1.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr1.meta new file mode 100644 index 0000000..431aa13 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr1.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 825bf9eee2b16c7499e5cfb3c9721df0 +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr1/ffxm_fsr1.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr1/ffxm_fsr1.h new file mode 100644 index 0000000..ad5c865 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr1/ffxm_fsr1.h @@ -0,0 +1,1251 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +/// @defgroup FfxGPUFsr1 FidelityFX FSR1 +/// FidelityFX Super Resolution 1 GPU documentation +/// +/// @ingroup FfxGPUEffects + +/// Setup required constant values for EASU (works on CPU or GPU). +/// +/// @param [out] con0 +/// @param [out] con1 +/// @param [out] con2 +/// @param [out] con3 +/// @param [in] inputViewportInPixelsX The rendered image resolution being upscaled in X dimension. +/// @param [in] inputViewportInPixelsY The rendered image resolution being upscaled in Y dimension. +/// @param [in] inputSizeInPixelsX The resolution of the resource containing the input image (useful for dynamic resolution) in X dimension. +/// @param [in] inputSizeInPixelsY The resolution of the resource containing the input image (useful for dynamic resolution) in Y dimension. +/// @param [in] outputSizeInPixelsX The display resolution which the input image gets upscaled to in X dimension. +/// @param [in] outputSizeInPixelsY The display resolution which the input image gets upscaled to in Y dimension. +/// +/// @ingroup FfxGPUFsr1 +FFXM_STATIC void ffxFsrPopulateEasuConstants( + FFXM_PARAMETER_INOUT FfxUInt32x4 con0, + FFXM_PARAMETER_INOUT FfxUInt32x4 con1, + FFXM_PARAMETER_INOUT FfxUInt32x4 con2, + FFXM_PARAMETER_INOUT FfxUInt32x4 con3, + FFXM_PARAMETER_IN FfxFloat32 inputViewportInPixelsX, + FFXM_PARAMETER_IN FfxFloat32 inputViewportInPixelsY, + FFXM_PARAMETER_IN FfxFloat32 inputSizeInPixelsX, + FFXM_PARAMETER_IN FfxFloat32 inputSizeInPixelsY, + FFXM_PARAMETER_IN FfxFloat32 outputSizeInPixelsX, + FFXM_PARAMETER_IN FfxFloat32 outputSizeInPixelsY) +{ + // Output integer position to a pixel position in viewport. + con0[0] = ffxAsUInt32(inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX)); + con0[1] = ffxAsUInt32(inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY)); + con0[2] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX) - FfxFloat32(0.5)); + con0[3] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY) - FfxFloat32(0.5)); + + // Viewport pixel position to normalized image space. + // This is used to get upper-left of 'F' tap. + con1[0] = ffxAsUInt32(ffxReciprocal(inputSizeInPixelsX)); + con1[1] = ffxAsUInt32(ffxReciprocal(inputSizeInPixelsY)); + + // Centers of gather4, first offset from upper-left of 'F'. + // +---+---+ + // | | | + // +--(0)--+ + // | b | c | + // +---F---+---+---+ + // | e | f | g | h | + // +--(1)--+--(2)--+ + // | i | j | k | l | + // +---+---+---+---+ + // | n | o | + // +--(3)--+ + // | | | + // +---+---+ + con1[2] = ffxAsUInt32(FfxFloat32(1.0) * ffxReciprocal(inputSizeInPixelsX)); + con1[3] = ffxAsUInt32(FfxFloat32(-1.0) * ffxReciprocal(inputSizeInPixelsY)); + + // These are from (0) instead of 'F'. + con2[0] = ffxAsUInt32(FfxFloat32(-1.0) * ffxReciprocal(inputSizeInPixelsX)); + con2[1] = ffxAsUInt32(FfxFloat32(2.0) * ffxReciprocal(inputSizeInPixelsY)); + con2[2] = ffxAsUInt32(FfxFloat32(1.0) * ffxReciprocal(inputSizeInPixelsX)); + con2[3] = ffxAsUInt32(FfxFloat32(2.0) * ffxReciprocal(inputSizeInPixelsY)); + con3[0] = ffxAsUInt32(FfxFloat32(0.0) * ffxReciprocal(inputSizeInPixelsX)); + con3[1] = ffxAsUInt32(FfxFloat32(4.0) * ffxReciprocal(inputSizeInPixelsY)); + con3[2] = con3[3] = 0; +} + +/// Setup required constant values for EASU (works on CPU or GPU). +/// +/// @param [out] con0 +/// @param [out] con1 +/// @param [out] con2 +/// @param [out] con3 +/// @param [in] inputViewportInPixelsX The resolution of the input in the X dimension. +/// @param [in] inputViewportInPixelsY The resolution of the input in the Y dimension. +/// @param [in] inputSizeInPixelsX The input size in pixels in the X dimension. +/// @param [in] inputSizeInPixelsY The input size in pixels in the Y dimension. +/// @param [in] outputSizeInPixelsX The output size in pixels in the X dimension. +/// @param [in] outputSizeInPixelsY The output size in pixels in the Y dimension. +/// @param [in] inputOffsetInPixelsX The input image offset in the X dimension into the resource containing it (useful for dynamic resolution). +/// @param [in] inputOffsetInPixelsY The input image offset in the Y dimension into the resource containing it (useful for dynamic resolution). +/// +/// @ingroup FfxGPUFsr1 +FFXM_STATIC void ffxFsrPopulateEasuConstantsOffset( + FFXM_PARAMETER_INOUT FfxUInt32x4 con0, + FFXM_PARAMETER_INOUT FfxUInt32x4 con1, + FFXM_PARAMETER_INOUT FfxUInt32x4 con2, + FFXM_PARAMETER_INOUT FfxUInt32x4 con3, + FFXM_PARAMETER_IN FfxFloat32 inputViewportInPixelsX, + FFXM_PARAMETER_IN FfxFloat32 inputViewportInPixelsY, + FFXM_PARAMETER_IN FfxFloat32 inputSizeInPixelsX, + FFXM_PARAMETER_IN FfxFloat32 inputSizeInPixelsY, + FFXM_PARAMETER_IN FfxFloat32 outputSizeInPixelsX, + FFXM_PARAMETER_IN FfxFloat32 outputSizeInPixelsY, + FFXM_PARAMETER_IN FfxFloat32 inputOffsetInPixelsX, + FFXM_PARAMETER_IN FfxFloat32 inputOffsetInPixelsY) +{ + ffxFsrPopulateEasuConstants( + con0, + con1, + con2, + con3, + inputViewportInPixelsX, + inputViewportInPixelsY, + inputSizeInPixelsX, + inputSizeInPixelsY, + outputSizeInPixelsX, + outputSizeInPixelsY); + + // override + con0[2] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX) - FfxFloat32(0.5) + inputOffsetInPixelsX); + con0[3] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY) - FfxFloat32(0.5) + inputOffsetInPixelsY); +} + +#if defined(FFXM_GPU) && defined(FFXM_FSR_EASU_FLOAT) +// Input callback prototypes, need to be implemented by calling shader +FfxFloat32x4 FsrEasuRF(FfxFloat32x2 p); +FfxFloat32x4 FsrEasuGF(FfxFloat32x2 p); +FfxFloat32x4 FsrEasuBF(FfxFloat32x2 p); + +// Filtering for a given tap for the scalar. +void fsrEasuTapFloat( + FFXM_PARAMETER_INOUT FfxFloat32x3 accumulatedColor, // Accumulated color, with negative lobe. + FFXM_PARAMETER_INOUT FfxFloat32 accumulatedWeight, // Accumulated weight. + FFXM_PARAMETER_IN FfxFloat32x2 pixelOffset, // Pixel offset from resolve position to tap. + FFXM_PARAMETER_IN FfxFloat32x2 gradientDirection, // Gradient direction. + FFXM_PARAMETER_IN FfxFloat32x2 length, // Length. + FFXM_PARAMETER_IN FfxFloat32 negativeLobeStrength, // Negative lobe strength. + FFXM_PARAMETER_IN FfxFloat32 clippingPoint, // Clipping point. + FFXM_PARAMETER_IN FfxFloat32x3 color) // Tap color. +{ + // Rotate offset by direction. + FfxFloat32x2 rotatedOffset; + rotatedOffset.x = (pixelOffset.x * (gradientDirection.x)) + (pixelOffset.y * gradientDirection.y); + rotatedOffset.y = (pixelOffset.x * (-gradientDirection.y)) + (pixelOffset.y * gradientDirection.x); + + // Anisotropy. + rotatedOffset *= length; + + // Compute distance^2. + FfxFloat32 distanceSquared = rotatedOffset.x * rotatedOffset.x + rotatedOffset.y * rotatedOffset.y; + + // Limit to the window as at corner, 2 taps can easily be outside. + distanceSquared = ffxMin(distanceSquared, clippingPoint); + + // Approximation of lancos2 without sin() or rcp(), or sqrt() to get x. + // (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2 + // |_______________________________________| |_______________| + // base window + // The general form of the 'base' is, + // (a*(b*x^2-1)^2-(a-1)) + // Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe. + FfxFloat32 weightB = FfxFloat32(2.0 / 5.0) * distanceSquared + FfxFloat32(-1.0); + FfxFloat32 weightA = negativeLobeStrength * distanceSquared + FfxFloat32(-1.0); + weightB *= weightB; + weightA *= weightA; + weightB = FfxFloat32(25.0 / 16.0) * weightB + FfxFloat32(-(25.0 / 16.0 - 1.0)); + FfxFloat32 weight = weightB * weightA; + + // Do weighted average. + accumulatedColor += color * weight; + accumulatedWeight += weight; +} + +// Accumulate direction and length. +void fsrEasuSetFloat( + FFXM_PARAMETER_INOUT FfxFloat32x2 direction, + FFXM_PARAMETER_INOUT FfxFloat32 length, + FFXM_PARAMETER_IN FfxFloat32x2 pp, + FFXM_PARAMETER_IN FfxBoolean biS, + FFXM_PARAMETER_IN FfxBoolean biT, + FFXM_PARAMETER_IN FfxBoolean biU, + FFXM_PARAMETER_IN FfxBoolean biV, + FFXM_PARAMETER_IN FfxFloat32 lA, + FFXM_PARAMETER_IN FfxFloat32 lB, + FFXM_PARAMETER_IN FfxFloat32 lC, + FFXM_PARAMETER_IN FfxFloat32 lD, + FFXM_PARAMETER_IN FfxFloat32 lE) +{ + // Compute bilinear weight, branches factor out as predicates are compiler time immediates. + // s t + // u v + FfxFloat32 weight = FfxFloat32(0.0); + if (biS) + weight = (FfxFloat32(1.0) - pp.x) * (FfxFloat32(1.0) - pp.y); + if (biT) + weight = pp.x * (FfxFloat32(1.0) - pp.y); + if (biU) + weight = (FfxFloat32(1.0) - pp.x) * pp.y; + if (biV) + weight = pp.x * pp.y; + + // Direction is the '+' diff. + // a + // b c d + // e + // Then takes magnitude from abs average of both sides of 'c'. + // Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms. + FfxFloat32 dc = lD - lC; + FfxFloat32 cb = lC - lB; + FfxFloat32 lengthX = max(abs(dc), abs(cb)); + lengthX = ffxApproximateReciprocal(lengthX); + FfxFloat32 directionX = lD - lB; + direction.x += directionX * weight; + lengthX = ffxSaturate(abs(directionX) * lengthX); + lengthX *= lengthX; + length += lengthX * weight; + + // Repeat for the y axis. + FfxFloat32 ec = lE - lC; + FfxFloat32 ca = lC - lA; + FfxFloat32 lengthY = max(abs(ec), abs(ca)); + lengthY = ffxApproximateReciprocal(lengthY); + FfxFloat32 directionY = lE - lA; + direction.y += directionY * weight; + lengthY = ffxSaturate(abs(directionY) * lengthY); + lengthY *= lengthY; + length += lengthY * weight; +} + +/// Apply edge-aware spatial upsampling using 32bit floating point precision calculations. +/// +/// @param [out] outPixel The computed color of a pixel. +/// @param [in] integerPosition Integer pixel position within the output. +/// @param [in] con0 The first constant value generated by ffxFsrPopulateEasuConstants. +/// @param [in] con1 The second constant value generated by ffxFsrPopulateEasuConstants. +/// @param [in] con2 The third constant value generated by ffxFsrPopulateEasuConstants. +/// @param [in] con3 The fourth constant value generated by ffxFsrPopulateEasuConstants. +/// +/// @ingroup FSR +void ffxFsrEasuFloat( + FFXM_PARAMETER_OUT FfxFloat32x3 pix, + FFXM_PARAMETER_IN FfxUInt32x2 ip, + FFXM_PARAMETER_IN FfxUInt32x4 con0, + FFXM_PARAMETER_IN FfxUInt32x4 con1, + FFXM_PARAMETER_IN FfxUInt32x4 con2, + FFXM_PARAMETER_IN FfxUInt32x4 con3) +{ + // Get position of 'f'. + FfxFloat32x2 pp = FfxFloat32x2(ip) * ffxAsFloat(con0.xy) + ffxAsFloat(con0.zw); + FfxFloat32x2 fp = floor(pp); + pp -= fp; + + // 12-tap kernel. + // b c + // e f g h + // i j k l + // n o + // Gather 4 ordering. + // a b + // r g + // For packed FP16, need either {rg} or {ab} so using the following setup for gather in all versions, + // a b <- unused (z) + // r g + // a b a b + // r g r g + // a b + // r g <- unused (z) + // Allowing dead-code removal to remove the 'z's. + FfxFloat32x2 p0 = fp * ffxAsFloat(con1.xy) + ffxAsFloat(con1.zw); + + // These are from p0 to avoid pulling two constants on pre-Navi hardware. + FfxFloat32x2 p1 = p0 + ffxAsFloat(con2.xy); + FfxFloat32x2 p2 = p0 + ffxAsFloat(con2.zw); + FfxFloat32x2 p3 = p0 + ffxAsFloat(con3.xy); + FfxFloat32x4 bczzR = FsrEasuRF(p0); + FfxFloat32x4 bczzG = FsrEasuGF(p0); + FfxFloat32x4 bczzB = FsrEasuBF(p0); + FfxFloat32x4 ijfeR = FsrEasuRF(p1); + FfxFloat32x4 ijfeG = FsrEasuGF(p1); + FfxFloat32x4 ijfeB = FsrEasuBF(p1); + FfxFloat32x4 klhgR = FsrEasuRF(p2); + FfxFloat32x4 klhgG = FsrEasuGF(p2); + FfxFloat32x4 klhgB = FsrEasuBF(p2); + FfxFloat32x4 zzonR = FsrEasuRF(p3); + FfxFloat32x4 zzonG = FsrEasuGF(p3); + FfxFloat32x4 zzonB = FsrEasuBF(p3); + + // Simplest multi-channel approximate luma possible (luma times 2, in 2 FMA/MAD). + FfxFloat32x4 bczzL = bczzB * ffxBroadcast4(0.5) + (bczzR * ffxBroadcast4(0.5) + bczzG); + FfxFloat32x4 ijfeL = ijfeB * ffxBroadcast4(0.5) + (ijfeR * ffxBroadcast4(0.5) + ijfeG); + FfxFloat32x4 klhgL = klhgB * ffxBroadcast4(0.5) + (klhgR * ffxBroadcast4(0.5) + klhgG); + FfxFloat32x4 zzonL = zzonB * ffxBroadcast4(0.5) + (zzonR * ffxBroadcast4(0.5) + zzonG); + + // Rename. + FfxFloat32 bL = bczzL.x; + FfxFloat32 cL = bczzL.y; + FfxFloat32 iL = ijfeL.x; + FfxFloat32 jL = ijfeL.y; + FfxFloat32 fL = ijfeL.z; + FfxFloat32 eL = ijfeL.w; + FfxFloat32 kL = klhgL.x; + FfxFloat32 lL = klhgL.y; + FfxFloat32 hL = klhgL.z; + FfxFloat32 gL = klhgL.w; + FfxFloat32 oL = zzonL.z; + FfxFloat32 nL = zzonL.w; + + // Accumulate for bilinear interpolation. + FfxFloat32x2 dir = ffxBroadcast2(0.0); + FfxFloat32 len = FfxFloat32(0.0); + fsrEasuSetFloat(dir, len, pp, FFXM_TRUE, FFXM_FALSE, FFXM_FALSE, FFXM_FALSE, bL, eL, fL, gL, jL); + fsrEasuSetFloat(dir, len, pp, FFXM_FALSE, FFXM_TRUE, FFXM_FALSE, FFXM_FALSE, cL, fL, gL, hL, kL); + fsrEasuSetFloat(dir, len, pp, FFXM_FALSE, FFXM_FALSE, FFXM_TRUE, FFXM_FALSE, fL, iL, jL, kL, nL); + fsrEasuSetFloat(dir, len, pp, FFXM_FALSE, FFXM_FALSE, FFXM_FALSE, FFXM_TRUE, gL, jL, kL, lL, oL); + + // Normalize with approximation, and cleanup close to zero. + FfxFloat32x2 dir2 = dir * dir; + FfxFloat32 dirR = dir2.x + dir2.y; + FfxBoolean zro = dirR < FfxFloat32(1.0 / 32768.0); + dirR = ffxApproximateReciprocalSquareRoot(dirR); + dirR = zro ? FfxFloat32(1.0) : dirR; + dir.x = zro ? FfxFloat32(1.0) : dir.x; + dir *= ffxBroadcast2(dirR); + + // Transform from {0 to 2} to {0 to 1} range, and shape with square. + len = len * FfxFloat32(0.5); + len *= len; + + // Stretch kernel {1.0 vert|horz, to sqrt(2.0) on diagonal}. + FfxFloat32 stretch = (dir.x * dir.x + dir.y * dir.y) * ffxApproximateReciprocal(max(abs(dir.x), abs(dir.y))); + + // Anisotropic length after rotation, + // x := 1.0 lerp to 'stretch' on edges + // y := 1.0 lerp to 2x on edges + FfxFloat32x2 len2 = FfxFloat32x2(FfxFloat32(1.0) + (stretch - FfxFloat32(1.0)) * len, FfxFloat32(1.0) + FfxFloat32(-0.5) * len); + + // Based on the amount of 'edge', + // the window shifts from +/-{sqrt(2.0) to slightly beyond 2.0}. + FfxFloat32 lob = FfxFloat32(0.5) + FfxFloat32((1.0 / 4.0 - 0.04) - 0.5) * len; + + // Set distance^2 clipping point to the end of the adjustable window. + FfxFloat32 clp = ffxApproximateReciprocal(lob); + + // Accumulation mixed with min/max of 4 nearest. + // b c + // e f g h + // i j k l + // n o + FfxFloat32x3 min4 = + ffxMin(ffxMin3(FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z), FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w), FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)), + FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x)); + FfxFloat32x3 max4 = + max(ffxMax3(FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z), FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w), FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)), FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x)); + + // Accumulation. + FfxFloat32x3 aC = ffxBroadcast3(0.0); + FfxFloat32 aW = FfxFloat32(0.0); + fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, -1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(bczzR.x, bczzG.x, bczzB.x)); // b + fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, -1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(bczzR.y, bczzG.y, bczzB.y)); // c + fsrEasuTapFloat(aC, aW, FfxFloat32x2(-1.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.x, ijfeG.x, ijfeB.x)); // i + fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)); // j + fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z)); // f + fsrEasuTapFloat(aC, aW, FfxFloat32x2(-1.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.w, ijfeG.w, ijfeB.w)); // e + fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x)); // k + fsrEasuTapFloat(aC, aW, FfxFloat32x2(2.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.y, klhgG.y, klhgB.y)); // l + fsrEasuTapFloat(aC, aW, FfxFloat32x2(2.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.z, klhgG.z, klhgB.z)); // h + fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w)); // g + fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.z, zzonG.z, zzonB.z)); // o + fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.w, zzonG.w, zzonB.w)); // n + + // Normalize and dering. + pix = ffxMin(max4, max(min4, aC * ffxBroadcast3(rcp(aW)))); +} +#endif // #if defined(FFXM_GPU) && defined(FFXM_FSR_EASU_FLOAT) + +#if defined(FFXM_GPU) && FFXM_HALF == 1 && defined(FFXM_FSR_EASU_HALF) +// Input callback prototypes, need to be implemented by calling shader +FfxFloat16x4 FsrEasuRH(FfxFloat32x2 p); +FfxFloat16x4 FsrEasuGH(FfxFloat32x2 p); +FfxFloat16x4 FsrEasuBH(FfxFloat32x2 p); + +// This runs 2 taps in parallel. +void FsrEasuTapH( + FFXM_PARAMETER_INOUT FfxFloat16x2 aCR, + FFXM_PARAMETER_INOUT FfxFloat16x2 aCG, + FFXM_PARAMETER_INOUT FfxFloat16x2 aCB, + FFXM_PARAMETER_INOUT FfxFloat16x2 aW, + FFXM_PARAMETER_IN FfxFloat16x2 offX, + FFXM_PARAMETER_IN FfxFloat16x2 offY, + FFXM_PARAMETER_IN FfxFloat16x2 dir, + FFXM_PARAMETER_IN FfxFloat16x2 len, + FFXM_PARAMETER_IN FfxFloat16 lob, + FFXM_PARAMETER_IN FfxFloat16 clp, + FFXM_PARAMETER_IN FfxFloat16x2 cR, + FFXM_PARAMETER_IN FfxFloat16x2 cG, + FFXM_PARAMETER_IN FfxFloat16x2 cB) +{ + FfxFloat16x2 vX, vY; + vX = offX * dir.xx + offY * dir.yy; + vY = offX * (-dir.yy) + offY * dir.xx; + vX *= len.x; + vY *= len.y; + FfxFloat16x2 d2 = vX * vX + vY * vY; + d2 = min(d2, FFXM_BROADCAST_FLOAT16X2(clp)); + FfxFloat16x2 wB = FFXM_BROADCAST_FLOAT16X2(2.0 / 5.0) * d2 + FFXM_BROADCAST_FLOAT16X2(-1.0); + FfxFloat16x2 wA = FFXM_BROADCAST_FLOAT16X2(lob) * d2 + FFXM_BROADCAST_FLOAT16X2(-1.0); + wB *= wB; + wA *= wA; + wB = FFXM_BROADCAST_FLOAT16X2(25.0 / 16.0) * wB + FFXM_BROADCAST_FLOAT16X2(-(25.0 / 16.0 - 1.0)); + FfxFloat16x2 w = wB * wA; + aCR += cR * w; + aCG += cG * w; + aCB += cB * w; + aW += w; +} + +// This runs 2 taps in parallel. +void FsrEasuSetH( + FFXM_PARAMETER_INOUT FfxFloat16x2 dirPX, + FFXM_PARAMETER_INOUT FfxFloat16x2 dirPY, + FFXM_PARAMETER_INOUT FfxFloat16x2 lenP, + FFXM_PARAMETER_IN FfxFloat16x2 pp, + FFXM_PARAMETER_IN FfxBoolean biST, + FFXM_PARAMETER_IN FfxBoolean biUV, + FFXM_PARAMETER_IN FfxFloat16x2 lA, + FFXM_PARAMETER_IN FfxFloat16x2 lB, + FFXM_PARAMETER_IN FfxFloat16x2 lC, + FFXM_PARAMETER_IN FfxFloat16x2 lD, + FFXM_PARAMETER_IN FfxFloat16x2 lE) +{ + FfxFloat16x2 w = FFXM_BROADCAST_FLOAT16X2(0.0); + + if (biST) + w = (FfxFloat16x2(1.0, 0.0) + FfxFloat16x2(-pp.x, pp.x)) * FFXM_BROADCAST_FLOAT16X2(FFXM_BROADCAST_FLOAT16(1.0) - pp.y); + + if (biUV) + w = (FfxFloat16x2(1.0, 0.0) + FfxFloat16x2(-pp.x, pp.x)) * FFXM_BROADCAST_FLOAT16X2(pp.y); + + // ABS is not free in the packed FP16 path. + FfxFloat16x2 dc = lD - lC; + FfxFloat16x2 cb = lC - lB; + FfxFloat16x2 lenX = max(abs(dc), abs(cb)); + lenX = ffxReciprocalHalf(lenX); + + FfxFloat16x2 dirX = lD - lB; + dirPX += dirX * w; + lenX = ffxSaturate(abs(dirX) * lenX); + lenX *= lenX; + lenP += lenX * w; + FfxFloat16x2 ec = lE - lC; + FfxFloat16x2 ca = lC - lA; + FfxFloat16x2 lenY = max(abs(ec), abs(ca)); + lenY = ffxReciprocalHalf(lenY); + FfxFloat16x2 dirY = lE - lA; + dirPY += dirY * w; + lenY = ffxSaturate(abs(dirY) * lenY); + lenY *= lenY; + lenP += lenY * w; +} + +void FsrEasuH( + FFXM_PARAMETER_OUT FfxFloat16x3 pix, + FFXM_PARAMETER_IN FfxUInt32x2 ip, + FFXM_PARAMETER_IN FfxUInt32x4 con0, + FFXM_PARAMETER_IN FfxUInt32x4 con1, + FFXM_PARAMETER_IN FfxUInt32x4 con2, + FFXM_PARAMETER_IN FfxUInt32x4 con3) +{ + FfxFloat32x2 pp = FfxFloat32x2(ip) * ffxAsFloat(con0.xy) + ffxAsFloat(con0.zw); + FfxFloat32x2 fp = floor(pp); + pp -= fp; + FfxFloat16x2 ppp = FfxFloat16x2(pp); + + FfxFloat32x2 p0 = fp * ffxAsFloat(con1.xy) + ffxAsFloat(con1.zw); + FfxFloat32x2 p1 = p0 + ffxAsFloat(con2.xy); + FfxFloat32x2 p2 = p0 + ffxAsFloat(con2.zw); + FfxFloat32x2 p3 = p0 + ffxAsFloat(con3.xy); + FfxFloat16x4 bczzR = FsrEasuRH(p0); + FfxFloat16x4 bczzG = FsrEasuGH(p0); + FfxFloat16x4 bczzB = FsrEasuBH(p0); + FfxFloat16x4 ijfeR = FsrEasuRH(p1); + FfxFloat16x4 ijfeG = FsrEasuGH(p1); + FfxFloat16x4 ijfeB = FsrEasuBH(p1); + FfxFloat16x4 klhgR = FsrEasuRH(p2); + FfxFloat16x4 klhgG = FsrEasuGH(p2); + FfxFloat16x4 klhgB = FsrEasuBH(p2); + FfxFloat16x4 zzonR = FsrEasuRH(p3); + FfxFloat16x4 zzonG = FsrEasuGH(p3); + FfxFloat16x4 zzonB = FsrEasuBH(p3); + + FfxFloat16x4 bczzL = bczzB * FFXM_BROADCAST_FLOAT16X4(0.5) + (bczzR * FFXM_BROADCAST_FLOAT16X4(0.5) + bczzG); + FfxFloat16x4 ijfeL = ijfeB * FFXM_BROADCAST_FLOAT16X4(0.5) + (ijfeR * FFXM_BROADCAST_FLOAT16X4(0.5) + ijfeG); + FfxFloat16x4 klhgL = klhgB * FFXM_BROADCAST_FLOAT16X4(0.5) + (klhgR * FFXM_BROADCAST_FLOAT16X4(0.5) + klhgG); + FfxFloat16x4 zzonL = zzonB * FFXM_BROADCAST_FLOAT16X4(0.5) + (zzonR * FFXM_BROADCAST_FLOAT16X4(0.5) + zzonG); + FfxFloat16 bL = bczzL.x; + FfxFloat16 cL = bczzL.y; + FfxFloat16 iL = ijfeL.x; + FfxFloat16 jL = ijfeL.y; + FfxFloat16 fL = ijfeL.z; + FfxFloat16 eL = ijfeL.w; + FfxFloat16 kL = klhgL.x; + FfxFloat16 lL = klhgL.y; + FfxFloat16 hL = klhgL.z; + FfxFloat16 gL = klhgL.w; + FfxFloat16 oL = zzonL.z; + FfxFloat16 nL = zzonL.w; + + // This part is different, accumulating 2 taps in parallel. + FfxFloat16x2 dirPX = FFXM_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 dirPY = FFXM_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 lenP = FFXM_BROADCAST_FLOAT16X2(0.0); + FsrEasuSetH(dirPX, + dirPY, + lenP, + ppp, + FfxBoolean(true), + FfxBoolean(false), + FfxFloat16x2(bL, cL), + FfxFloat16x2(eL, fL), + FfxFloat16x2(fL, gL), + FfxFloat16x2(gL, hL), + FfxFloat16x2(jL, kL)); + FsrEasuSetH(dirPX, + dirPY, + lenP, + ppp, + FfxBoolean(false), + FfxBoolean(true), + FfxFloat16x2(fL, gL), + FfxFloat16x2(iL, jL), + FfxFloat16x2(jL, kL), + FfxFloat16x2(kL, lL), + FfxFloat16x2(nL, oL)); + FfxFloat16x2 dir = FfxFloat16x2(dirPX.r + dirPX.g, dirPY.r + dirPY.g); + FfxFloat16 len = lenP.r + lenP.g; + + FfxFloat16x2 dir2 = dir * dir; + FfxFloat16 dirR = dir2.x + dir2.y; + FfxUInt32 zro = FfxUInt32(dirR < FFXM_BROADCAST_FLOAT16(1.0 / 32768.0)); + dirR = ffxApproximateReciprocalSquareRootHalf(dirR); + dirR = (zro > 0) ? FFXM_BROADCAST_FLOAT16(1.0) : dirR; + dir.x = (zro > 0) ? FFXM_BROADCAST_FLOAT16(1.0) : dir.x; + dir *= FFXM_BROADCAST_FLOAT16X2(dirR); + len = len * FFXM_BROADCAST_FLOAT16(0.5); + len *= len; + FfxFloat16 stretch = (dir.x * dir.x + dir.y * dir.y) * ffxApproximateReciprocalHalf(max(abs(dir.x), abs(dir.y))); + FfxFloat16x2 len2 = + FfxFloat16x2(FFXM_BROADCAST_FLOAT16(1.0) + (stretch - FFXM_BROADCAST_FLOAT16(1.0)) * len, FFXM_BROADCAST_FLOAT16(1.0) + FFXM_BROADCAST_FLOAT16(-0.5) * len); + FfxFloat16 lob = FFXM_BROADCAST_FLOAT16(0.5) + FFXM_BROADCAST_FLOAT16((1.0 / 4.0 - 0.04) - 0.5) * len; + FfxFloat16 clp = ffxApproximateReciprocalHalf(lob); + + // FP16 is different, using packed trick to do min and max in same operation. + FfxFloat16x2 bothR = + max(max(FfxFloat16x2(-ijfeR.z, ijfeR.z), FfxFloat16x2(-klhgR.w, klhgR.w)), max(FfxFloat16x2(-ijfeR.y, ijfeR.y), FfxFloat16x2(-klhgR.x, klhgR.x))); + FfxFloat16x2 bothG = + max(max(FfxFloat16x2(-ijfeG.z, ijfeG.z), FfxFloat16x2(-klhgG.w, klhgG.w)), max(FfxFloat16x2(-ijfeG.y, ijfeG.y), FfxFloat16x2(-klhgG.x, klhgG.x))); + FfxFloat16x2 bothB = + max(max(FfxFloat16x2(-ijfeB.z, ijfeB.z), FfxFloat16x2(-klhgB.w, klhgB.w)), max(FfxFloat16x2(-ijfeB.y, ijfeB.y), FfxFloat16x2(-klhgB.x, klhgB.x))); + + // This part is different for FP16, working pairs of taps at a time. + FfxFloat16x2 pR = FFXM_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 pG = FFXM_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 pB = FFXM_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 pW = FFXM_BROADCAST_FLOAT16X2(0.0); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(0.0, 1.0) - ppp.xx, FfxFloat16x2(-1.0, -1.0) - ppp.yy, dir, len2, lob, clp, bczzR.xy, bczzG.xy, bczzB.xy); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(-1.0, 0.0) - ppp.xx, FfxFloat16x2(1.0, 1.0) - ppp.yy, dir, len2, lob, clp, ijfeR.xy, ijfeG.xy, ijfeB.xy); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(0.0, -1.0) - ppp.xx, FfxFloat16x2(0.0, 0.0) - ppp.yy, dir, len2, lob, clp, ijfeR.zw, ijfeG.zw, ijfeB.zw); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(1.0, 2.0) - ppp.xx, FfxFloat16x2(1.0, 1.0) - ppp.yy, dir, len2, lob, clp, klhgR.xy, klhgG.xy, klhgB.xy); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(2.0, 1.0) - ppp.xx, FfxFloat16x2(0.0, 0.0) - ppp.yy, dir, len2, lob, clp, klhgR.zw, klhgG.zw, klhgB.zw); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(1.0, 0.0) - ppp.xx, FfxFloat16x2(2.0, 2.0) - ppp.yy, dir, len2, lob, clp, zzonR.zw, zzonG.zw, zzonB.zw); + FfxFloat16x3 aC = FfxFloat16x3(pR.x + pR.y, pG.x + pG.y, pB.x + pB.y); + FfxFloat16 aW = pW.x + pW.y; + + // Slightly different for FP16 version due to combined min and max. + pix = min(FfxFloat16x3(bothR.y, bothG.y, bothB.y), max(-FfxFloat16x3(bothR.x, bothG.x, bothB.x), aC * FFXM_BROADCAST_FLOAT16X3(ffxReciprocalHalf(aW)))); +} +#endif // #if defined(FFXM_GPU) && defined(FFXM_HALF) && defined(FFXM_FSR_EASU_HALF) + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [RCAS] ROBUST CONTRAST ADAPTIVE SHARPENING +// +//------------------------------------------------------------------------------------------------------------------------------ +// CAS uses a simplified mechanism to convert local contrast into a variable amount of sharpness. +// RCAS uses a more exact mechanism, solving for the maximum local sharpness possible before clipping. +// RCAS also has a built in process to limit sharpening of what it detects as possible noise. +// RCAS sharper does not support scaling, as it should be applied after EASU scaling. +// Pass EASU output straight into RCAS, no color conversions necessary. +//------------------------------------------------------------------------------------------------------------------------------ +// RCAS is based on the following logic. +// RCAS uses a 5 tap filter in a cross pattern (same as CAS), +// w n +// w 1 w for taps w m e +// w s +// Where 'w' is the negative lobe weight. +// output = (w*(n+e+w+s)+m)/(4*w+1) +// RCAS solves for 'w' by seeing where the signal might clip out of the {0 to 1} input range, +// 0 == (w*(n+e+w+s)+m)/(4*w+1) -> w = -m/(n+e+w+s) +// 1 == (w*(n+e+w+s)+m)/(4*w+1) -> w = (1-m)/(n+e+w+s-4*1) +// Then chooses the 'w' which results in no clipping, limits 'w', and multiplies by the 'sharp' amount. +// This solution above has issues with MSAA input as the steps along the gradient cause edge detection issues. +// So RCAS uses 4x the maximum and 4x the minimum (depending on equation)in place of the individual taps. +// As well as switching from 'm' to either the minimum or maximum (depending on side), to help in energy conservation. +// This stabilizes RCAS. +// RCAS does a simple highpass which is normalized against the local contrast then shaped, +// 0.25 +// 0.25 -1 0.25 +// 0.25 +// This is used as a noise detection filter, to reduce the effect of RCAS on grain, and focus on real edges. +// +// GLSL example for the required callbacks : +// +// FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p){return FfxFloat16x4(imageLoad(imgSrc,FfxInt32x2(p)));} +// void FsrRcasInputH(inout FfxFloat16 r,inout FfxFloat16 g,inout FfxFloat16 b) +// { +// //do any simple input color conversions here or leave empty if none needed +// } +// +// FsrRcasCon need to be called from the CPU or GPU to set up constants. +// Including a GPU example here, the 'con' value would be stored out to a constant buffer. +// +// FfxUInt32x4 con; +// FsrRcasCon(con, +// 0.0); // The scale is {0.0 := maximum sharpness, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. +// --------------- +// RCAS sharpening supports a CAS-like pass-through alpha via, +// #define FSR_RCAS_PASSTHROUGH_ALPHA 1 +// RCAS also supports a define to enable a more expensive path to avoid some sharpening of noise. +// Would suggest it is better to apply film grain after RCAS sharpening (and after scaling) instead of using this define, +// #define FSR_RCAS_DENOISE 1 +//============================================================================================================================== +// This is set at the limit of providing unnatural results for sharpening. +#define FSR_RCAS_LIMIT (0.25-(1.0/16.0)) +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// CONSTANT SETUP +//============================================================================================================================== +// Call to setup required constant values (works on CPU or GPU). + FFXM_STATIC void FsrRcasCon(FfxUInt32x4 con, + // The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. + FfxFloat32 sharpness) + { + // Transform from stops to linear value. + sharpness = exp2(-sharpness); + FfxFloat32x2 hSharp = {sharpness, sharpness}; + con[0] = ffxAsUInt32(sharpness); + con[1] = packHalf2x16(hSharp); + con[2] = 0; + con[3] = 0; + } + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// NON-PACKED 32-BIT VERSION +//============================================================================================================================== +#if defined(FFXM_GPU)&&defined(FSR_RCAS_F) + // Input callback prototypes that need to be implemented by calling shader + FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p); + void FsrRcasInputF(inout FfxFloat32 r,inout FfxFloat32 g,inout FfxFloat32 b); +//------------------------------------------------------------------------------------------------------------------------------ + void FsrRcasF(out FfxFloat32 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy. + out FfxFloat32 pixG, + out FfxFloat32 pixB, +#ifdef FSR_RCAS_PASSTHROUGH_ALPHA + out FfxFloat32 pixA, +#endif + FfxUInt32x2 ip, // Integer pixel position in output. + FfxUInt32x4 con) + { // Constant generated by RcasSetup(). + // Algorithm uses minimal 3x3 pixel neighborhood. + // b + // d e f + // h + FfxInt32x2 sp = FfxInt32x2(ip); + FfxFloat32x3 b = FsrRcasLoadF(sp + FfxInt32x2(0, -1)).rgb; + FfxFloat32x3 d = FsrRcasLoadF(sp + FfxInt32x2(-1, 0)).rgb; +#ifdef FSR_RCAS_PASSTHROUGH_ALPHA + FfxFloat32x4 ee = FsrRcasLoadF(sp); + FfxFloat32x3 e = ee.rgb; + pixA = ee.a; +#else + FfxFloat32x3 e = FsrRcasLoadF(sp).rgb; +#endif + FfxFloat32x3 f = FsrRcasLoadF(sp + FfxInt32x2(1, 0)).rgb; + FfxFloat32x3 h = FsrRcasLoadF(sp + FfxInt32x2(0, 1)).rgb; + // Rename (32-bit) or regroup (16-bit). + FfxFloat32 bR = b.r; + FfxFloat32 bG = b.g; + FfxFloat32 bB = b.b; + FfxFloat32 dR = d.r; + FfxFloat32 dG = d.g; + FfxFloat32 dB = d.b; + FfxFloat32 eR = e.r; + FfxFloat32 eG = e.g; + FfxFloat32 eB = e.b; + FfxFloat32 fR = f.r; + FfxFloat32 fG = f.g; + FfxFloat32 fB = f.b; + FfxFloat32 hR = h.r; + FfxFloat32 hG = h.g; + FfxFloat32 hB = h.b; + // Run optional input transform. + FsrRcasInputF(bR, bG, bB); + FsrRcasInputF(dR, dG, dB); + FsrRcasInputF(eR, eG, eB); + FsrRcasInputF(fR, fG, fB); + FsrRcasInputF(hR, hG, hB); + // Luma times 2. + FfxFloat32 bL = bB * FfxFloat32(0.5) + (bR * FfxFloat32(0.5) + bG); + FfxFloat32 dL = dB * FfxFloat32(0.5) + (dR * FfxFloat32(0.5) + dG); + FfxFloat32 eL = eB * FfxFloat32(0.5) + (eR * FfxFloat32(0.5) + eG); + FfxFloat32 fL = fB * FfxFloat32(0.5) + (fR * FfxFloat32(0.5) + fG); + FfxFloat32 hL = hB * FfxFloat32(0.5) + (hR * FfxFloat32(0.5) + hG); + // Noise detection. + FfxFloat32 nz = FfxFloat32(0.25) * bL + FfxFloat32(0.25) * dL + FfxFloat32(0.25) * fL + FfxFloat32(0.25) * hL - eL; + nz = ffxSaturate(abs(nz) * ffxApproximateReciprocalMedium(ffxMax3(ffxMax3(bL, dL, eL), fL, hL) - ffxMin3(ffxMin3(bL, dL, eL), fL, hL))); + nz = FfxFloat32(-0.5) * nz + FfxFloat32(1.0); + // Min and max of ring. + FfxFloat32 mn4R = ffxMin(ffxMin3(bR, dR, fR), hR); + FfxFloat32 mn4G = ffxMin(ffxMin3(bG, dG, fG), hG); + FfxFloat32 mn4B = ffxMin(ffxMin3(bB, dB, fB), hB); + FfxFloat32 mx4R = max(ffxMax3(bR, dR, fR), hR); + FfxFloat32 mx4G = max(ffxMax3(bG, dG, fG), hG); + FfxFloat32 mx4B = max(ffxMax3(bB, dB, fB), hB); + // Immediate constants for peak range. + FfxFloat32x2 peakC = FfxFloat32x2(1.0, -1.0 * 4.0); + // Limiters, these need to be high precision RCPs. + FfxFloat32 hitMinR = mn4R * rcp(FfxFloat32(4.0) * mx4R); + FfxFloat32 hitMinG = mn4G * rcp(FfxFloat32(4.0) * mx4G); + FfxFloat32 hitMinB = mn4B * rcp(FfxFloat32(4.0) * mx4B); + FfxFloat32 hitMaxR = (peakC.x - mx4R) * rcp(FfxFloat32(4.0) * mn4R + peakC.y); + FfxFloat32 hitMaxG = (peakC.x - mx4G) * rcp(FfxFloat32(4.0) * mn4G + peakC.y); + FfxFloat32 hitMaxB = (peakC.x - mx4B) * rcp(FfxFloat32(4.0) * mn4B + peakC.y); + FfxFloat32 lobeR = max(-hitMinR, hitMaxR); + FfxFloat32 lobeG = max(-hitMinG, hitMaxG); + FfxFloat32 lobeB = max(-hitMinB, hitMaxB); + FfxFloat32 lobe = max(FfxFloat32(-FSR_RCAS_LIMIT), ffxMin(ffxMax3(lobeR, lobeG, lobeB), FfxFloat32(0.0))) * ffxAsFloat + (con.x); + // Apply noise removal. +#ifdef FSR_RCAS_DENOISE + lobe *= nz; +#endif + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + FfxFloat32 rcpL = ffxApproximateReciprocalMedium(FfxFloat32(4.0) * lobe + FfxFloat32(1.0)); + pixR = (lobe * bR + lobe * dR + lobe * hR + lobe * fR + eR) * rcpL; + pixG = (lobe * bG + lobe * dG + lobe * hG + lobe * fG + eG) * rcpL; + pixB = (lobe * bB + lobe * dB + lobe * hB + lobe * fB + eB) * rcpL; + return; + } +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// NON-PACKED 16-BIT VERSION +//============================================================================================================================== +#if defined(FFXM_GPU) && FFXM_HALF == 1 && defined(FSR_RCAS_H) + // Input callback prototypes that need to be implemented by calling shader + FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p); + void FsrRcasInputH(inout FfxFloat16 r,inout FfxFloat16 g,inout FfxFloat16 b); +//------------------------------------------------------------------------------------------------------------------------------ + void FsrRcasH( + out FfxFloat16 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy. + out FfxFloat16 pixG, + out FfxFloat16 pixB, + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + out FfxFloat16 pixA, + #endif + FfxUInt32x2 ip, // Integer pixel position in output. + FfxUInt32x4 con){ // Constant generated by RcasSetup(). + // Sharpening algorithm uses minimal 3x3 pixel neighborhood. + // b + // d e f + // h + FfxInt16x2 sp=FfxInt16x2(ip); + FfxFloat16x3 b=FsrRcasLoadH(sp+FfxInt16x2( 0,-1)).rgb; + FfxFloat16x3 d=FsrRcasLoadH(sp+FfxInt16x2(-1, 0)).rgb; + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + FfxFloat16x4 ee=FsrRcasLoadH(sp); + FfxFloat16x3 e=ee.rgb;pixA=ee.a; + #else + FfxFloat16x3 e=FsrRcasLoadH(sp).rgb; + #endif + FfxFloat16x3 f=FsrRcasLoadH(sp+FfxInt16x2( 1, 0)).rgb; + FfxFloat16x3 h=FsrRcasLoadH(sp+FfxInt16x2( 0, 1)).rgb; + // Rename (32-bit) or regroup (16-bit). + FfxFloat16 bR=b.r; + FfxFloat16 bG=b.g; + FfxFloat16 bB=b.b; + FfxFloat16 dR=d.r; + FfxFloat16 dG=d.g; + FfxFloat16 dB=d.b; + FfxFloat16 eR=e.r; + FfxFloat16 eG=e.g; + FfxFloat16 eB=e.b; + FfxFloat16 fR=f.r; + FfxFloat16 fG=f.g; + FfxFloat16 fB=f.b; + FfxFloat16 hR=h.r; + FfxFloat16 hG=h.g; + FfxFloat16 hB=h.b; + // Run optional input transform. + FsrRcasInputH(bR,bG,bB); + FsrRcasInputH(dR,dG,dB); + FsrRcasInputH(eR,eG,eB); + FsrRcasInputH(fR,fG,fB); + FsrRcasInputH(hR,hG,hB); + // Luma times 2. + FfxFloat16 bL=bB*FFXM_BROADCAST_FLOAT16(0.5)+(bR*FFXM_BROADCAST_FLOAT16(0.5)+bG); + FfxFloat16 dL=dB*FFXM_BROADCAST_FLOAT16(0.5)+(dR*FFXM_BROADCAST_FLOAT16(0.5)+dG); + FfxFloat16 eL=eB*FFXM_BROADCAST_FLOAT16(0.5)+(eR*FFXM_BROADCAST_FLOAT16(0.5)+eG); + FfxFloat16 fL=fB*FFXM_BROADCAST_FLOAT16(0.5)+(fR*FFXM_BROADCAST_FLOAT16(0.5)+fG); + FfxFloat16 hL=hB*FFXM_BROADCAST_FLOAT16(0.5)+(hR*FFXM_BROADCAST_FLOAT16(0.5)+hG); + // Noise detection. + FfxFloat16 nz=FFXM_BROADCAST_FLOAT16(0.25)*bL+FFXM_BROADCAST_FLOAT16(0.25)*dL+FFXM_BROADCAST_FLOAT16(0.25)*fL+FFXM_BROADCAST_FLOAT16(0.25)*hL-eL; + nz=ffxSaturate(abs(nz)*ffxApproximateReciprocalMediumHalf(ffxMax3Half(ffxMax3Half(bL,dL,eL),fL,hL)-ffxMin3Half(ffxMin3Half(bL,dL,eL),fL,hL))); + nz=FFXM_BROADCAST_FLOAT16(-0.5)*nz+FFXM_BROADCAST_FLOAT16(1.0); + // Min and max of ring. + FfxFloat16 mn4R=min(ffxMin3Half(bR,dR,fR),hR); + FfxFloat16 mn4G=min(ffxMin3Half(bG,dG,fG),hG); + FfxFloat16 mn4B=min(ffxMin3Half(bB,dB,fB),hB); + FfxFloat16 mx4R=max(ffxMax3Half(bR,dR,fR),hR); + FfxFloat16 mx4G=max(ffxMax3Half(bG,dG,fG),hG); + FfxFloat16 mx4B=max(ffxMax3Half(bB,dB,fB),hB); + // Immediate constants for peak range. + FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0); + // Limiters, these need to be high precision RCPs. + FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16(4.0)*mx4R); + FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16(4.0)*mx4G); + FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16(4.0)*mx4B); + FfxFloat16 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16(4.0)*mn4R+peakC.y); + FfxFloat16 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16(4.0)*mn4G+peakC.y); + FfxFloat16 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16(4.0)*mn4B+peakC.y); + FfxFloat16 lobeR=max(-hitMinR,hitMaxR); + FfxFloat16 lobeG=max(-hitMinG,hitMaxG); + FfxFloat16 lobeB=max(-hitMinB,hitMaxB); + FfxFloat16 lobe=max(FFXM_BROADCAST_FLOAT16(-FSR_RCAS_LIMIT),min(ffxMax3Half(lobeR,lobeG,lobeB),FFXM_BROADCAST_FLOAT16(0.0)))*FFXM_UINT32_TO_FLOAT16X2(con.y).x; + // Apply noise removal. + #ifdef FSR_RCAS_DENOISE + lobe*=nz; + #endif + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + FfxFloat16 rcpL=ffxApproximateReciprocalMediumHalf(FFXM_BROADCAST_FLOAT16(4.0)*lobe+FFXM_BROADCAST_FLOAT16(1.0)); + pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; + pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; + pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL; +} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// PACKED 16-BIT VERSION +//============================================================================================================================== +#if defined(FFXM_GPU)&& FFXM_HALF == 1 && defined(FSR_RCAS_HX2) + // Input callback prototypes that need to be implemented by the calling shader + FfxFloat16x4 FsrRcasLoadHx2(FfxInt16x2 p); + void FsrRcasInputHx2(inout FfxFloat16x2 r,inout FfxFloat16x2 g,inout FfxFloat16x2 b); +//------------------------------------------------------------------------------------------------------------------------------ + // Can be used to convert from packed Structures of Arrays to Arrays of Structures for store. + void FsrRcasDepackHx2(out FfxFloat16x4 pix0,out FfxFloat16x4 pix1,FfxFloat16x2 pixR,FfxFloat16x2 pixG,FfxFloat16x2 pixB){ + #ifdef FFXM_HLSL + // Invoke a slower path for DX only, since it won't allow uninitialized values. + pix0.a=pix1.a=0.0; + #endif + pix0.rgb=FfxFloat16x3(pixR.x,pixG.x,pixB.x); + pix1.rgb=FfxFloat16x3(pixR.y,pixG.y,pixB.y);} +//------------------------------------------------------------------------------------------------------------------------------ + void FsrRcasHx2( + // Output values are for 2 8x8 tiles in a 16x8 region. + // pix.x = left 8x8 tile + // pix.y = right 8x8 tile + // This enables later processing to easily be packed as well. + out FfxFloat16x2 pixR, + out FfxFloat16x2 pixG, + out FfxFloat16x2 pixB, + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + out FfxFloat16x2 pixA, + #endif + FfxUInt32x2 ip, // Integer pixel position in output. + FfxUInt32x4 con){ // Constant generated by RcasSetup(). + // No scaling algorithm uses minimal 3x3 pixel neighborhood. + FfxInt16x2 sp0=FfxInt16x2(ip); + FfxFloat16x3 b0=FsrRcasLoadHx2(sp0+FfxInt16x2( 0,-1)).rgb; + FfxFloat16x3 d0=FsrRcasLoadHx2(sp0+FfxInt16x2(-1, 0)).rgb; + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + FfxFloat16x4 ee0=FsrRcasLoadHx2(sp0); + FfxFloat16x3 e0=ee0.rgb;pixA.r=ee0.a; + #else + FfxFloat16x3 e0=FsrRcasLoadHx2(sp0).rgb; + #endif + FfxFloat16x3 f0=FsrRcasLoadHx2(sp0+FfxInt16x2( 1, 0)).rgb; + FfxFloat16x3 h0=FsrRcasLoadHx2(sp0+FfxInt16x2( 0, 1)).rgb; + FfxInt16x2 sp1=sp0+FfxInt16x2(8,0); + FfxFloat16x3 b1=FsrRcasLoadHx2(sp1+FfxInt16x2( 0,-1)).rgb; + FfxFloat16x3 d1=FsrRcasLoadHx2(sp1+FfxInt16x2(-1, 0)).rgb; + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + FfxFloat16x4 ee1=FsrRcasLoadHx2(sp1); + FfxFloat16x3 e1=ee1.rgb;pixA.g=ee1.a; + #else + FfxFloat16x3 e1=FsrRcasLoadHx2(sp1).rgb; + #endif + FfxFloat16x3 f1=FsrRcasLoadHx2(sp1+FfxInt16x2( 1, 0)).rgb; + FfxFloat16x3 h1=FsrRcasLoadHx2(sp1+FfxInt16x2( 0, 1)).rgb; + // Arrays of Structures to Structures of Arrays conversion. + FfxFloat16x2 bR=FfxFloat16x2(b0.r,b1.r); + FfxFloat16x2 bG=FfxFloat16x2(b0.g,b1.g); + FfxFloat16x2 bB=FfxFloat16x2(b0.b,b1.b); + FfxFloat16x2 dR=FfxFloat16x2(d0.r,d1.r); + FfxFloat16x2 dG=FfxFloat16x2(d0.g,d1.g); + FfxFloat16x2 dB=FfxFloat16x2(d0.b,d1.b); + FfxFloat16x2 eR=FfxFloat16x2(e0.r,e1.r); + FfxFloat16x2 eG=FfxFloat16x2(e0.g,e1.g); + FfxFloat16x2 eB=FfxFloat16x2(e0.b,e1.b); + FfxFloat16x2 fR=FfxFloat16x2(f0.r,f1.r); + FfxFloat16x2 fG=FfxFloat16x2(f0.g,f1.g); + FfxFloat16x2 fB=FfxFloat16x2(f0.b,f1.b); + FfxFloat16x2 hR=FfxFloat16x2(h0.r,h1.r); + FfxFloat16x2 hG=FfxFloat16x2(h0.g,h1.g); + FfxFloat16x2 hB=FfxFloat16x2(h0.b,h1.b); + // Run optional input transform. + FsrRcasInputHx2(bR,bG,bB); + FsrRcasInputHx2(dR,dG,dB); + FsrRcasInputHx2(eR,eG,eB); + FsrRcasInputHx2(fR,fG,fB); + FsrRcasInputHx2(hR,hG,hB); + // Luma times 2. + FfxFloat16x2 bL=bB*FFXM_BROADCAST_FLOAT16X2(0.5)+(bR*FFXM_BROADCAST_FLOAT16X2(0.5)+bG); + FfxFloat16x2 dL=dB*FFXM_BROADCAST_FLOAT16X2(0.5)+(dR*FFXM_BROADCAST_FLOAT16X2(0.5)+dG); + FfxFloat16x2 eL=eB*FFXM_BROADCAST_FLOAT16X2(0.5)+(eR*FFXM_BROADCAST_FLOAT16X2(0.5)+eG); + FfxFloat16x2 fL=fB*FFXM_BROADCAST_FLOAT16X2(0.5)+(fR*FFXM_BROADCAST_FLOAT16X2(0.5)+fG); + FfxFloat16x2 hL=hB*FFXM_BROADCAST_FLOAT16X2(0.5)+(hR*FFXM_BROADCAST_FLOAT16X2(0.5)+hG); + // Noise detection. + FfxFloat16x2 nz=FFXM_BROADCAST_FLOAT16X2(0.25)*bL+FFXM_BROADCAST_FLOAT16X2(0.25)*dL+FFXM_BROADCAST_FLOAT16X2(0.25)*fL+FFXM_BROADCAST_FLOAT16X2(0.25)*hL-eL; + nz=ffxSaturate(abs(nz)*ffxApproximateReciprocalMediumHalf(ffxMax3Half(ffxMax3Half(bL,dL,eL),fL,hL)-ffxMin3Half(ffxMin3Half(bL,dL,eL),fL,hL))); + nz=FFXM_BROADCAST_FLOAT16X2(-0.5)*nz+FFXM_BROADCAST_FLOAT16X2(1.0); + // Min and max of ring. + FfxFloat16x2 mn4R=min(ffxMin3Half(bR,dR,fR),hR); + FfxFloat16x2 mn4G=min(ffxMin3Half(bG,dG,fG),hG); + FfxFloat16x2 mn4B=min(ffxMin3Half(bB,dB,fB),hB); + FfxFloat16x2 mx4R=max(ffxMax3Half(bR,dR,fR),hR); + FfxFloat16x2 mx4G=max(ffxMax3Half(bG,dG,fG),hG); + FfxFloat16x2 mx4B=max(ffxMax3Half(bB,dB,fB),hB); + // Immediate constants for peak range. + FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0); + // Limiters, these need to be high precision RCPs. + FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16X2(4.0)*mx4R); + FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16X2(4.0)*mx4G); + FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16X2(4.0)*mx4B); + FfxFloat16x2 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16X2(4.0)*mn4R+peakC.y); + FfxFloat16x2 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16X2(4.0)*mn4G+peakC.y); + FfxFloat16x2 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16X2(4.0)*mn4B+peakC.y); + FfxFloat16x2 lobeR=max(-hitMinR,hitMaxR); + FfxFloat16x2 lobeG=max(-hitMinG,hitMaxG); + FfxFloat16x2 lobeB=max(-hitMinB,hitMaxB); + FfxFloat16x2 lobe=max(FFXM_BROADCAST_FLOAT16X2(-FSR_RCAS_LIMIT),min(ffxMax3Half(lobeR,lobeG,lobeB),FFXM_BROADCAST_FLOAT16X2(0.0)))*FFXM_BROADCAST_FLOAT16X2(FFXM_UINT32_TO_FLOAT16X2(con.y).x); + // Apply noise removal. + #ifdef FSR_RCAS_DENOISE + lobe*=nz; + #endif + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + FfxFloat16x2 rcpL=ffxApproximateReciprocalMediumHalf(FFXM_BROADCAST_FLOAT16X2(4.0)*lobe+FFXM_BROADCAST_FLOAT16X2(1.0)); + pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; + pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; + pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [LFGA] LINEAR FILM GRAIN APPLICATOR +// +//------------------------------------------------------------------------------------------------------------------------------ +// Adding output-resolution film grain after scaling is a good way to mask both rendering and scaling artifacts. +// Suggest using tiled blue noise as film grain input, with peak noise frequency set for a specific look and feel. +// The 'Lfga*()' functions provide a convenient way to introduce grain. +// These functions limit grain based on distance to signal limits. +// This is done so that the grain is temporally energy preserving, and thus won't modify image tonality. +// Grain application should be done in a linear colorspace. +// The grain should be temporally changing, but have a temporal sum per pixel that adds to zero (non-biased). +//------------------------------------------------------------------------------------------------------------------------------ +// Usage, +// FsrLfga*( +// color, // In/out linear colorspace color {0 to 1} ranged. +// grain, // Per pixel grain texture value {-0.5 to 0.5} ranged, input is 3-channel to support colored grain. +// amount); // Amount of grain (0 to 1} ranged. +//------------------------------------------------------------------------------------------------------------------------------ +// Example if grain texture is monochrome: 'FsrLfgaF(color,ffxBroadcast3(grain),amount)' +//============================================================================================================================== +#if defined(FFXM_GPU) + // Maximum grain is the minimum distance to the signal limit. + void FsrLfgaF(inout FfxFloat32x3 c, FfxFloat32x3 t, FfxFloat32 a) + { + c += (t * ffxBroadcast3(a)) * ffxMin(ffxBroadcast3(1.0) - c, c); + } +#endif +//============================================================================================================================== +#if defined(FFXM_GPU)&& FFXM_HALF == 1 + // Half precision version (slower). + void FsrLfgaH(inout FfxFloat16x3 c, FfxFloat16x3 t, FfxFloat16 a) + { + c += (t * FFXM_BROADCAST_FLOAT16X3(a)) * min(FFXM_BROADCAST_FLOAT16X3(1.0) - c, c); + } + //------------------------------------------------------------------------------------------------------------------------------ + // Packed half precision version (faster). + void FsrLfgaHx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB,FfxFloat16x2 tR,FfxFloat16x2 tG,FfxFloat16x2 tB,FfxFloat16 a){ + cR+=(tR*FFXM_BROADCAST_FLOAT16X2(a))*min(FFXM_BROADCAST_FLOAT16X2(1.0)-cR,cR);cG+=(tG*FFXM_BROADCAST_FLOAT16X2(a))*min(FFXM_BROADCAST_FLOAT16X2(1.0)-cG,cG);cB+=(tB*FFXM_BROADCAST_FLOAT16X2(a))*min(FFXM_BROADCAST_FLOAT16X2(1.0)-cB,cB);} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [SRTM] SIMPLE REVERSIBLE TONE-MAPPER +// +//------------------------------------------------------------------------------------------------------------------------------ +// This provides a way to take linear HDR color {0 to FP16_MAX} and convert it into a temporary {0 to 1} ranged post-tonemapped linear. +// The tonemapper preserves RGB ratio, which helps maintain HDR color bleed during filtering. +//------------------------------------------------------------------------------------------------------------------------------ +// Reversible tonemapper usage, +// FsrSrtm*(color); // {0 to FP16_MAX} converted to {0 to 1}. +// FsrSrtmInv*(color); // {0 to 1} converted into {0 to 32768, output peak safe for FP16}. +//============================================================================================================================== +#if defined(FFXM_GPU) + void FsrSrtmF(inout FfxFloat32x3 c) + { + c *= ffxBroadcast3(rcp(ffxMax3(c.r, c.g, c.b) + FfxFloat32(1.0))); + } + // The extra max solves the c=1.0 case (which is a /0). + void FsrSrtmInvF(inout FfxFloat32x3 c){c*=ffxBroadcast3(rcp(max(FfxFloat32(1.0/32768.0),FfxFloat32(1.0)-ffxMax3(c.r,c.g,c.b))));} +#endif +//============================================================================================================================== +#if defined(FFXM_GPU )&& FFXM_HALF == 1 + void FsrSrtmH(inout FfxFloat16x3 c) + { + c *= FFXM_BROADCAST_FLOAT16X3(ffxReciprocalHalf(ffxMax3Half(c.r, c.g, c.b) + FFXM_BROADCAST_FLOAT16(1.0))); + } + void FsrSrtmInvH(inout FfxFloat16x3 c) + { + c *= FFXM_BROADCAST_FLOAT16X3(ffxReciprocalHalf(max(FFXM_BROADCAST_FLOAT16(1.0 / 32768.0), FFXM_BROADCAST_FLOAT16(1.0) - ffxMax3Half(c.r, c.g, c.b)))); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrSrtmHx2(inout FfxFloat16x2 cR, inout FfxFloat16x2 cG, inout FfxFloat16x2 cB) + { + FfxFloat16x2 rcp = ffxReciprocalHalf(ffxMax3Half(cR, cG, cB) + FFXM_BROADCAST_FLOAT16X2(1.0)); + cR *= rcp; + cG *= rcp; + cB *= rcp; + } + void FsrSrtmInvHx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB) + { + FfxFloat16x2 rcp=ffxReciprocalHalf(max(FFXM_BROADCAST_FLOAT16X2(1.0/32768.0),FFXM_BROADCAST_FLOAT16X2(1.0)-ffxMax3Half(cR,cG,cB))); + cR*=rcp; + cG*=rcp; + cB*=rcp; + } +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [TEPD] TEMPORAL ENERGY PRESERVING DITHER +// +//------------------------------------------------------------------------------------------------------------------------------ +// Temporally energy preserving dithered {0 to 1} linear to gamma 2.0 conversion. +// Gamma 2.0 is used so that the conversion back to linear is just to square the color. +// The conversion comes in 8-bit and 10-bit modes, designed for output to 8-bit UNORM or 10:10:10:2 respectively. +// Given good non-biased temporal blue noise as dither input, +// the output dither will temporally conserve energy. +// This is done by choosing the linear nearest step point instead of perceptual nearest. +// See code below for details. +//------------------------------------------------------------------------------------------------------------------------------ +// DX SPEC RULES FOR FLOAT->UNORM 8-BIT CONVERSION +// =============================================== +// - Output is 'FfxUInt32(floor(saturate(n)*255.0+0.5))'. +// - Thus rounding is to nearest. +// - NaN gets converted to zero. +// - INF is clamped to {0.0 to 1.0}. +//============================================================================================================================== +#if defined(FFXM_GPU) + // Hand tuned integer position to dither value, with more values than simple checkerboard. + // Only 32-bit has enough precision for this compddation. + // Output is {0 to <1}. + FfxFloat32 FsrTepdDitF(FfxUInt32x2 p, FfxUInt32 f) + { + FfxFloat32 x = FfxFloat32(p.x + f); + FfxFloat32 y = FfxFloat32(p.y); + // The 1.61803 golden ratio. + FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0); + // Number designed to provide a good visual pattern. + FfxFloat32 b = FfxFloat32(1.0 / 3.69); + x = x * a + (y * b); + return ffxFract(x); + } + //------------------------------------------------------------------------------------------------------------------------------ + // This version is 8-bit gamma 2.0. + // The 'c' input is {0 to 1}. + // Output is {0 to 1} ready for image store. + void FsrTepdC8F(inout FfxFloat32x3 c, FfxFloat32 dit) + { + FfxFloat32x3 n = ffxSqrt(c); + n = floor(n * ffxBroadcast3(255.0)) * ffxBroadcast3(1.0 / 255.0); + FfxFloat32x3 a = n * n; + FfxFloat32x3 b = n + ffxBroadcast3(1.0 / 255.0); + b = b * b; + // Ratio of 'a' to 'b' required to produce 'c'. + // ffxApproximateReciprocal() won't work here (at least for very high dynamic ranges). + // ffxApproximateReciprocalMedium() is an IADD,FMA,MUL. + FfxFloat32x3 r = (c - b) * ffxApproximateReciprocalMedium(a - b); + // Use the ratio as a cutoff to choose 'a' or 'b'. + // ffxIsGreaterThanZero() is a MUL. + c = ffxSaturate(n + ffxIsGreaterThanZero(ffxBroadcast3(dit) - r) * ffxBroadcast3(1.0 / 255.0)); + } + //------------------------------------------------------------------------------------------------------------------------------ + // This version is 10-bit gamma 2.0. + // The 'c' input is {0 to 1}. + // Output is {0 to 1} ready for image store. + void FsrTepdC10F(inout FfxFloat32x3 c, FfxFloat32 dit) + { + FfxFloat32x3 n = ffxSqrt(c); + n = floor(n * ffxBroadcast3(1023.0)) * ffxBroadcast3(1.0 / 1023.0); + FfxFloat32x3 a = n * n; + FfxFloat32x3 b = n + ffxBroadcast3(1.0 / 1023.0); + b = b * b; + FfxFloat32x3 r = (c - b) * ffxApproximateReciprocalMedium(a - b); + c = ffxSaturate(n + ffxIsGreaterThanZero(ffxBroadcast3(dit) - r) * ffxBroadcast3(1.0 / 1023.0)); + } +#endif +//============================================================================================================================== +#if defined(FFXM_GPU)&& FFXM_HALF == 1 + FfxFloat16 FsrTepdDitH(FfxUInt32x2 p, FfxUInt32 f) + { + FfxFloat32 x = FfxFloat32(p.x + f); + FfxFloat32 y = FfxFloat32(p.y); + FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0); + FfxFloat32 b = FfxFloat32(1.0 / 3.69); + x = x * a + (y * b); + return FfxFloat16(ffxFract(x)); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC8H(inout FfxFloat16x3 c, FfxFloat16 dit) + { + FfxFloat16x3 n = sqrt(c); + n = floor(n * FFXM_BROADCAST_FLOAT16X3(255.0)) * FFXM_BROADCAST_FLOAT16X3(1.0 / 255.0); + FfxFloat16x3 a = n * n; + FfxFloat16x3 b = n + FFXM_BROADCAST_FLOAT16X3(1.0 / 255.0); + b = b * b; + FfxFloat16x3 r = (c - b) * ffxApproximateReciprocalMediumHalf(a - b); + c = ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFXM_BROADCAST_FLOAT16X3(dit) - r) * FFXM_BROADCAST_FLOAT16X3(1.0 / 255.0)); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC10H(inout FfxFloat16x3 c, FfxFloat16 dit) + { + FfxFloat16x3 n = sqrt(c); + n = floor(n * FFXM_BROADCAST_FLOAT16X3(1023.0)) * FFXM_BROADCAST_FLOAT16X3(1.0 / 1023.0); + FfxFloat16x3 a = n * n; + FfxFloat16x3 b = n + FFXM_BROADCAST_FLOAT16X3(1.0 / 1023.0); + b = b * b; + FfxFloat16x3 r = (c - b) * ffxApproximateReciprocalMediumHalf(a - b); + c = ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFXM_BROADCAST_FLOAT16X3(dit) - r) * FFXM_BROADCAST_FLOAT16X3(1.0 / 1023.0)); + } + //============================================================================================================================== + // This computes dither for positions 'p' and 'p+{8,0}'. + FfxFloat16x2 FsrTepdDitHx2(FfxUInt32x2 p, FfxUInt32 f) + { + FfxFloat32x2 x; + x.x = FfxFloat32(p.x + f); + x.y = x.x + FfxFloat32(8.0); + FfxFloat32 y = FfxFloat32(p.y); + FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0); + FfxFloat32 b = FfxFloat32(1.0 / 3.69); + x = x * ffxBroadcast2(a) + ffxBroadcast2(y * b); + return FfxFloat16x2(ffxFract(x)); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC8Hx2(inout FfxFloat16x2 cR, inout FfxFloat16x2 cG, inout FfxFloat16x2 cB, FfxFloat16x2 dit) + { + FfxFloat16x2 nR = sqrt(cR); + FfxFloat16x2 nG = sqrt(cG); + FfxFloat16x2 nB = sqrt(cB); + nR = floor(nR * FFXM_BROADCAST_FLOAT16X2(255.0)) * FFXM_BROADCAST_FLOAT16X2(1.0 / 255.0); + nG = floor(nG * FFXM_BROADCAST_FLOAT16X2(255.0)) * FFXM_BROADCAST_FLOAT16X2(1.0 / 255.0); + nB = floor(nB * FFXM_BROADCAST_FLOAT16X2(255.0)) * FFXM_BROADCAST_FLOAT16X2(1.0 / 255.0); + FfxFloat16x2 aR = nR * nR; + FfxFloat16x2 aG = nG * nG; + FfxFloat16x2 aB = nB * nB; + FfxFloat16x2 bR = nR + FFXM_BROADCAST_FLOAT16X2(1.0 / 255.0); + bR = bR * bR; + FfxFloat16x2 bG = nG + FFXM_BROADCAST_FLOAT16X2(1.0 / 255.0); + bG = bG * bG; + FfxFloat16x2 bB = nB + FFXM_BROADCAST_FLOAT16X2(1.0 / 255.0); + bB = bB * bB; + FfxFloat16x2 rR = (cR - bR) * ffxApproximateReciprocalMediumHalf(aR - bR); + FfxFloat16x2 rG = (cG - bG) * ffxApproximateReciprocalMediumHalf(aG - bG); + FfxFloat16x2 rB = (cB - bB) * ffxApproximateReciprocalMediumHalf(aB - bB); + cR = ffxSaturate(nR + ffxIsGreaterThanZeroHalf(dit - rR) * FFXM_BROADCAST_FLOAT16X2(1.0 / 255.0)); + cG = ffxSaturate(nG + ffxIsGreaterThanZeroHalf(dit - rG) * FFXM_BROADCAST_FLOAT16X2(1.0 / 255.0)); + cB = ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFXM_BROADCAST_FLOAT16X2(1.0 / 255.0)); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC10Hx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB,FfxFloat16x2 dit){ + FfxFloat16x2 nR=sqrt(cR); + FfxFloat16x2 nG=sqrt(cG); + FfxFloat16x2 nB=sqrt(cB); + nR=floor(nR*FFXM_BROADCAST_FLOAT16X2(1023.0))*FFXM_BROADCAST_FLOAT16X2(1.0/1023.0); + nG=floor(nG*FFXM_BROADCAST_FLOAT16X2(1023.0))*FFXM_BROADCAST_FLOAT16X2(1.0/1023.0); + nB=floor(nB*FFXM_BROADCAST_FLOAT16X2(1023.0))*FFXM_BROADCAST_FLOAT16X2(1.0/1023.0); + FfxFloat16x2 aR=nR*nR; + FfxFloat16x2 aG=nG*nG; + FfxFloat16x2 aB=nB*nB; + FfxFloat16x2 bR=nR+FFXM_BROADCAST_FLOAT16X2(1.0/1023.0);bR=bR*bR; + FfxFloat16x2 bG=nG+FFXM_BROADCAST_FLOAT16X2(1.0/1023.0);bG=bG*bG; + FfxFloat16x2 bB=nB+FFXM_BROADCAST_FLOAT16X2(1.0/1023.0);bB=bB*bB; + FfxFloat16x2 rR=(cR-bR)*ffxApproximateReciprocalMediumHalf(aR-bR); + FfxFloat16x2 rG=(cG-bG)*ffxApproximateReciprocalMediumHalf(aG-bG); + FfxFloat16x2 rB=(cB-bB)*ffxApproximateReciprocalMediumHalf(aB-bB); + cR=ffxSaturate(nR+ffxIsGreaterThanZeroHalf(dit-rR)*FFXM_BROADCAST_FLOAT16X2(1.0/1023.0)); + cG=ffxSaturate(nG+ffxIsGreaterThanZeroHalf(dit-rG)*FFXM_BROADCAST_FLOAT16X2(1.0/1023.0)); + cB = ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFXM_BROADCAST_FLOAT16X2(1.0 / 1023.0)); +} +#endif diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr1/ffxm_fsr1.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr1/ffxm_fsr1.h.meta new file mode 100644 index 0000000..0ceaf34 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr1/ffxm_fsr1.h.meta @@ -0,0 +1,76 @@ +fileFormatVersion: 2 +guid: beffdc3cffeabd84491ac83b32a4d9f8 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Android: 1 + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Android: Android + second: + enabled: 0 + settings: + AndroidSharedLibraryType: Executable + CPU: ARMv7 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + CPU: AnyCPU + DefaultValueInitialized: true + OS: AnyOS + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2.meta new file mode 100644 index 0000000..1656967 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 8001d8b2af47f59409d886267a5ce04b +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h new file mode 100644 index 0000000..3cd15ae --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h @@ -0,0 +1,380 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#ifndef FFXM_FSR2_ACCUMULATE_H +#define FFXM_FSR2_ACCUMULATE_H + +struct AccumulateOutputs +{ +#if !FFXM_SHADER_QUALITY_OPT_SEPARATE_TEMPORAL_REACTIVE + FfxFloat32x4 fColorAndWeight; +#else + FfxFloat32x3 fUpscaledColor; + FfxFloat32 fTemporalReactive; +#endif + FfxFloat32x2 fLockStatus; + FfxFloat32x4 fLumaHistory; +#if (FFXM_FSR2_OPTION_APPLY_SHARPENING == 0) + FfxFloat32x3 fColor; +#endif +}; + +FfxFloat32 GetPxHrVelocity(FfxFloat32x2 fMotionVector) +{ + return length(fMotionVector * DisplaySize()); +} +#if FFXM_HALF +FFXM_MIN16_F GetPxHrVelocity(FFXM_MIN16_F2 fMotionVector) +{ + return length(fMotionVector * FFXM_MIN16_F2(DisplaySize())); +} +#endif + +void Accumulate(const AccumulationPassCommonParams params, FFXM_PARAMETER_INOUT FfxFloat32x3 fHistoryColor, FfxFloat32x3 fAccumulation, FFXM_PARAMETER_IN FfxFloat32x4 fUpsampledColorAndWeight) +{ + // Aviod invalid values when accumulation and upsampled weight is 0 + fAccumulation = ffxMax(FSR2_EPSILON.xxx, fAccumulation + fUpsampledColorAndWeight.www); + +#if FFXM_FSR2_OPTION_HDR_COLOR_INPUT +#if FFXM_SHADER_QUALITY_OPT_TONEMAPPED_RGB_PREPARED_INPUT_COLOR + fHistoryColor = Tonemap(fHistoryColor); +#else + //YCoCg -> RGB -> Tonemap -> YCoCg (Use RGB tonemapper to avoid color desaturation) + fUpsampledColorAndWeight.xyz = RGBToYCoCg(Tonemap(YCoCgToRGB(fUpsampledColorAndWeight.xyz))); + fHistoryColor = RGBToYCoCg(Tonemap(YCoCgToRGB(fHistoryColor))); +#endif +#endif + + const FfxFloat32x3 fAlpha = fUpsampledColorAndWeight.www / fAccumulation; + fHistoryColor = ffxLerp(fHistoryColor, fUpsampledColorAndWeight.xyz, fAlpha); + +#if !FFXM_SHADER_QUALITY_OPT_TONEMAPPED_RGB_PREPARED_INPUT_COLOR + fHistoryColor = YCoCgToRGB(fHistoryColor); +#endif + +#if FFXM_FSR2_OPTION_HDR_COLOR_INPUT + fHistoryColor = InverseTonemap(fHistoryColor); +#endif +} + +#if FFXM_HALF +void RectifyHistory( + const AccumulationPassCommonParams params, + RectificationBoxMin16 clippingBox, + FFXM_PARAMETER_INOUT FfxFloat32x3 fHistoryColor, + FFXM_PARAMETER_INOUT FfxFloat32x3 fAccumulation, + FfxFloat32 fLockContributionThisFrame, + FfxFloat32 fTemporalReactiveFactor, + FfxFloat32 fLumaInstabilityFactor) +#else +void RectifyHistory( + const AccumulationPassCommonParams params, + RectificationBox clippingBox, + FFXM_PARAMETER_INOUT FfxFloat32x3 fHistoryColor, + FFXM_PARAMETER_INOUT FfxFloat32x3 fAccumulation, + FfxFloat32 fLockContributionThisFrame, + FfxFloat32 fTemporalReactiveFactor, + FfxFloat32 fLumaInstabilityFactor) +#endif +{ + FfxFloat32 fScaleFactorInfluence = ffxMin(20.0f, ffxPow(FfxFloat32(1.0f / length(DownscaleFactor().x * DownscaleFactor().y)), 3.0f)); + + const FfxFloat32 fVecolityFactor = ffxSaturate(params.fHrVelocity / 20.0f); + const FfxFloat32 fBoxScaleT = ffxMax(params.fDepthClipFactor, ffxMax(params.fAccumulationMask, fVecolityFactor)); + FfxFloat32 fBoxScale = ffxLerp(fScaleFactorInfluence, 1.0f, fBoxScaleT); + + FfxFloat32x3 fScaledBoxVec = clippingBox.boxVec * fBoxScale; + FfxFloat32x3 boxMin = clippingBox.boxCenter - fScaledBoxVec; + FfxFloat32x3 boxMax = clippingBox.boxCenter + fScaledBoxVec; + FfxFloat32x3 boxCenter = clippingBox.boxCenter; + FfxFloat32 boxVecSize = length(clippingBox.boxVec); + + boxMin = ffxMax(clippingBox.aabbMin, boxMin); + boxMax = ffxMin(clippingBox.aabbMax, boxMax); +#if FFXM_SHADER_QUALITY_OPT_TONEMAPPED_RGB_PREPARED_INPUT_COLOR + boxMin = InverseTonemap(boxMin); + boxMax = InverseTonemap(boxMax); +#endif + + if (any(FFXM_GREATER_THAN(boxMin, fHistoryColor)) || any(FFXM_GREATER_THAN(fHistoryColor, boxMax))) { + + const FfxFloat32x3 fClampedHistoryColor = clamp(fHistoryColor, boxMin, boxMax); + + FfxFloat32x3 fHistoryContribution = ffxMax(fLumaInstabilityFactor, fLockContributionThisFrame).xxx; + + const FfxFloat32 fReactiveFactor = params.fDilatedReactiveFactor; + const FfxFloat32 fReactiveContribution = 1.0f - ffxPow(fReactiveFactor, 1.0f / 2.0f); + fHistoryContribution *= fReactiveContribution; + + // Scale history color using rectification info, also using accumulation mask to avoid potential invalid color protection + fHistoryColor = ffxLerp(fClampedHistoryColor, fHistoryColor, ffxSaturate(fHistoryContribution)); + + // Scale accumulation using rectification info + const FfxFloat32x3 fAccumulationMin = ffxMin(fAccumulation, FFXM_BROADCAST_FLOAT32X3(0.1f)); + fAccumulation = ffxLerp(fAccumulationMin, fAccumulation, ffxSaturate(fHistoryContribution)); + } +} + +void FinalizeLockStatus(const AccumulationPassCommonParams params, FfxFloat32x2 fLockStatus, FfxFloat32 fUpsampledWeight, FFXM_PARAMETER_INOUT AccumulateOutputs result) +{ + // we expect similar motion for next frame + // kill lock if that location is outside screen, avoid locks to be clamped to screen borders + FfxFloat32x2 fEstimatedUvNextFrame = params.fHrUv - params.fMotionVector; + if (IsUvInside(fEstimatedUvNextFrame) == false) { + KillLock(fLockStatus); + } + else { + // Decrease lock lifetime + const FfxFloat32 fLifetimeDecreaseLanczosMax = FfxFloat32(JitterSequenceLength()) * FfxFloat32(fAverageLanczosWeightPerFrame); + const FfxFloat32 fLifetimeDecrease = FfxFloat32(fUpsampledWeight / fLifetimeDecreaseLanczosMax); + fLockStatus[LOCK_LIFETIME_REMAINING] = ffxMax(FfxFloat32(0), fLockStatus[LOCK_LIFETIME_REMAINING] - fLifetimeDecrease); + } + + result.fLockStatus = fLockStatus; +} + + +FfxFloat32x3 ComputeBaseAccumulationWeight(const AccumulationPassCommonParams params, FfxFloat32 fThisFrameReactiveFactor, FfxBoolean bInMotionLastFrame, FfxFloat32 fUpsampledWeight, LockState lockState) +{ + // Always assume max accumulation was reached + FfxFloat32 fBaseAccumulation = fMaxAccumulationLanczosWeight * FfxFloat32(params.bIsExistingSample) * (1.0f - fThisFrameReactiveFactor) * (1.0f - params.fDepthClipFactor); + + fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight * 10.0f, ffxMax(FfxFloat32(bInMotionLastFrame), ffxSaturate(params.fHrVelocity * FfxFloat32(10))))); + + fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight, ffxSaturate(params.fHrVelocity / FfxFloat32(20)))); + + return fBaseAccumulation.xxx; +} + +#if FFXM_HALF +FfxFloat32 ComputeLumaInstabilityFactor(const AccumulationPassCommonParams params, RectificationBoxMin16 clippingBox, FfxFloat32 fThisFrameReactiveFactor, FfxFloat32 fLuminanceDiff, FFXM_PARAMETER_INOUT AccumulateOutputs result) +#else +FfxFloat32 ComputeLumaInstabilityFactor(const AccumulationPassCommonParams params, RectificationBox clippingBox, FfxFloat32 fThisFrameReactiveFactor, FfxFloat32 fLuminanceDiff, FFXM_PARAMETER_INOUT AccumulateOutputs result) +#endif +{ + const FfxFloat32 fUnormThreshold = 1.0f / 255.0f; + const FfxInt32 N_MINUS_1 = 0; + const FfxInt32 N_MINUS_2 = 1; + const FfxInt32 N_MINUS_3 = 2; + const FfxInt32 N_MINUS_4 = 3; + + FfxFloat32 fCurrentFrameLuma = clippingBox.boxCenter.x; + +#if FFXM_FSR2_OPTION_HDR_COLOR_INPUT + fCurrentFrameLuma = fCurrentFrameLuma / (1.0f + ffxMax(0.0f, fCurrentFrameLuma)); +#endif + + fCurrentFrameLuma = round(fCurrentFrameLuma * 255.0f) / 255.0f; + + const FfxBoolean bSampleLumaHistory = (ffxMax(ffxMax(params.fDepthClipFactor, params.fAccumulationMask), fLuminanceDiff) < 0.1f) && (params.bIsNewSample == false); + FfxFloat32x4 fCurrentFrameLumaHistory = bSampleLumaHistory ? SampleLumaHistory(params.fReprojectedHrUv) : FFXM_BROADCAST_FLOAT32X4(0.0f); + + FfxFloat32 fLumaInstability = 0.0f; + FfxFloat32 fDiffs0 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[N_MINUS_1]); + + FfxFloat32 fMin = abs(fDiffs0); + + if (fMin >= fUnormThreshold) { + for (int i = N_MINUS_2; i <= N_MINUS_4; i++) { + FfxFloat32 fDiffs1 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[i]); + + if (sign(fDiffs0) == sign(fDiffs1)) { + + // Scale difference to protect historically similar values + const FfxFloat32 fMinBias = 1.0f; + fMin = ffxMin(fMin, abs(fDiffs1) * fMinBias); + } + } + + const FfxFloat32 fBoxSize = clippingBox.boxVec.x; + const FfxFloat32 fBoxSizeFactor = ffxPow(ffxSaturate(fBoxSize / 0.1f), 6.0f); + + fLumaInstability = FfxFloat32(fMin != abs(fDiffs0)) * fBoxSizeFactor; + fLumaInstability = FfxFloat32(fLumaInstability > fUnormThreshold); + + fLumaInstability *= 1.0f - ffxMax(params.fAccumulationMask, ffxPow(fThisFrameReactiveFactor, 1.0f / 6.0f)); + } + + //shift history + fCurrentFrameLumaHistory[N_MINUS_4] = fCurrentFrameLumaHistory[N_MINUS_3]; + fCurrentFrameLumaHistory[N_MINUS_3] = fCurrentFrameLumaHistory[N_MINUS_2]; + fCurrentFrameLumaHistory[N_MINUS_2] = fCurrentFrameLumaHistory[N_MINUS_1]; + fCurrentFrameLumaHistory[N_MINUS_1] = fCurrentFrameLuma; + + result.fLumaHistory = fCurrentFrameLumaHistory; + + return fLumaInstability * FfxFloat32(fCurrentFrameLumaHistory[N_MINUS_4] != 0); +} + +FfxFloat32 ComputeTemporalReactiveFactor(const AccumulationPassCommonParams params, FfxFloat32 fTemporalReactiveFactor) +{ + FfxFloat32 fNewFactor = ffxMin(0.99f, fTemporalReactiveFactor); + + fNewFactor = ffxMax(fNewFactor, ffxLerp(fNewFactor, 0.4f, ffxSaturate(params.fHrVelocity))); + + fNewFactor = ffxMax(fNewFactor * fNewFactor, ffxMax(params.fDepthClipFactor * 0.1f, params.fDilatedReactiveFactor)); + + // Force reactive factor for new samples + fNewFactor = params.bIsNewSample ? 1.0f : fNewFactor; + + if (ffxSaturate(params.fHrVelocity * 10.0f) >= 1.0f) { + fNewFactor = ffxMax(FSR2_EPSILON, fNewFactor) * -1.0f; + } + + return fNewFactor; +} + +void initReactiveMaskFactors(FFXM_PARAMETER_INOUT AccumulationPassCommonParams params) +{ + const FFXM_MIN16_F2 fDilatedReactiveMasks = FFXM_MIN16_F2(SampleDilatedReactiveMasks(params.fLrUv_HwSampler)); + params.fDilatedReactiveFactor = fDilatedReactiveMasks.x; + params.fAccumulationMask = fDilatedReactiveMasks.y; +} + +void initDepthClipFactors(FFXM_PARAMETER_INOUT AccumulationPassCommonParams params) +{ + params.fDepthClipFactor = FFXM_MIN16_F(ffxSaturate(SampleDepthClip(params.fLrUv_HwSampler))); +} + +void initIsNewSample(FFXM_PARAMETER_INOUT AccumulationPassCommonParams params) +{ + const FfxBoolean bIsResetFrame = (0 == FrameIndex()); + params.bIsNewSample = (params.bIsExistingSample == false || bIsResetFrame); +} + + +AccumulationPassCommonParams InitParams(FfxInt32x2 iPxHrPos) +{ + AccumulationPassCommonParams params; + + params.iPxHrPos = iPxHrPos; + const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize(); + params.fHrUv = fHrUv; + + const FfxFloat32x2 fLrUvJittered = fHrUv + Jitter() / RenderSize(); + params.fLrUv_HwSampler = ClampUv(fLrUvJittered, RenderSize(), MaxRenderSize()); + + params.fMotionVector = GetMotionVector(iPxHrPos, fHrUv); + params.fHrVelocity = GetPxHrVelocity(params.fMotionVector); + + ComputeReprojectedUVs(params, params.fReprojectedHrUv, params.bIsExistingSample); + + return params; +} + +AccumulateOutputs Accumulate(FfxInt32x2 iPxHrPos) +{ + AccumulationPassCommonParams params = InitParams(iPxHrPos); + + FfxFloat32x3 fHistoryColor = FfxFloat32x3(0, 0, 0); + FFXM_MIN16_F2 fLockStatus; + InitializeNewLockSample(fLockStatus); + + FFXM_MIN16_F fTemporalReactiveFactor = FFXM_MIN16_F(0.0f); + FfxBoolean bInMotionLastFrame = FFXM_FALSE; + LockState lockState = { FFXM_FALSE , FFXM_FALSE }; + const FfxBoolean bIsResetFrame = (0 == FrameIndex()); + if (params.bIsExistingSample && !bIsResetFrame) { + ReprojectHistoryColor(params, fHistoryColor, fTemporalReactiveFactor, bInMotionLastFrame); + lockState = ReprojectHistoryLockStatus(params, fLockStatus); + } + + initReactiveMaskFactors(params); + initDepthClipFactors(params); + + FfxFloat32 fThisFrameReactiveFactor = ffxMax(params.fDilatedReactiveFactor, fTemporalReactiveFactor); + + FfxFloat32 fLuminanceDiff = 0.0f; + FfxFloat32 fLockContributionThisFrame = 0.0f; + FfxFloat32x2 fLockStatus32 = {fLockStatus.x, fLockStatus.y}; + UpdateLockStatus(params, fThisFrameReactiveFactor, lockState, fLockStatus32, fLockContributionThisFrame, fLuminanceDiff); + fLockStatus = FFXM_MIN16_F2(fLockStatus32); + +#ifdef FFXM_HLSL + AccumulateOutputs results = (AccumulateOutputs)0; +#else + AccumulateOutputs results; +#endif + + // Load upsampled input color +#if FFXM_HALF +#ifdef FFXM_HLSL + RectificationBoxMin16 clippingBox = (RectificationBoxMin16)0; +#else + RectificationBoxMin16 clippingBox; +#endif +#else +#ifdef FFXM_HLSL + RectificationBox clippingBox = (RectificationBox)0; +#else + RectificationBox clippingBox; +#endif +#endif + + initIsNewSample(params); + + FfxFloat32x4 fUpsampledColorAndWeight = ComputeUpsampledColorAndWeight(params, clippingBox, fThisFrameReactiveFactor); + + FinalizeLockStatus(params, fLockStatus, fUpsampledColorAndWeight.w, results); + +#if FFXM_SHADER_QUALITY_OPT_DISABLE_LUMA_INSTABILITY + const FfxFloat32 fLumaInstabilityFactor = 0.0f; +#else + const FfxFloat32 fLumaInstabilityFactor = ComputeLumaInstabilityFactor(params, clippingBox, fThisFrameReactiveFactor, fLuminanceDiff, results); +#endif + + FfxFloat32x3 fAccumulation = ComputeBaseAccumulationWeight(params, fThisFrameReactiveFactor, bInMotionLastFrame, fUpsampledColorAndWeight.w, lockState); + + if (params.bIsNewSample) { +#if FFXM_SHADER_QUALITY_OPT_TONEMAPPED_RGB_PREPARED_INPUT_COLOR + fHistoryColor = InverseTonemap(fUpsampledColorAndWeight.xyz); +#else + fHistoryColor = YCoCgToRGB(fUpsampledColorAndWeight.xyz); +#endif + } + else { + RectifyHistory(params, clippingBox, fHistoryColor, fAccumulation, fLockContributionThisFrame, fThisFrameReactiveFactor, fLumaInstabilityFactor); + + Accumulate(params, fHistoryColor, fAccumulation, fUpsampledColorAndWeight); + } + + fHistoryColor = UnprepareRgb(fHistoryColor, Exposure()); + + // Get new temporal reactive factor + fTemporalReactiveFactor = FFXM_MIN16_F(ComputeTemporalReactiveFactor(params, fThisFrameReactiveFactor)); + +#if !FFXM_SHADER_QUALITY_OPT_SEPARATE_TEMPORAL_REACTIVE + results.fColorAndWeight = FfxFloat32x4(fHistoryColor, fTemporalReactiveFactor); +#else + // Output the upscaled color and the temporal reactive factor if these are contained in separate textures + results.fUpscaledColor = fHistoryColor; + results.fTemporalReactive = fTemporalReactiveFactor; +#endif + // Output final color when RCAS is disabled +#if FFXM_FSR2_OPTION_APPLY_SHARPENING == 0 + results.fColor = fHistoryColor; +#endif + StoreNewLocks(iPxHrPos, 0); + + return results; +} + +#endif // FFXM_FSR2_ACCUMULATE_H diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h.meta new file mode 100644 index 0000000..77620fd --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h.meta @@ -0,0 +1,67 @@ +fileFormatVersion: 2 +guid: 508ebc327e5948447894b9bb6f08f843 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Android: 1 + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h new file mode 100644 index 0000000..7a0ba61 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h @@ -0,0 +1,1014 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "./fsr2/ffxm_fsr2_resources.h" + +#if defined(FFXM_GPU) +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic push +#pragma dxc diagnostic ignored "-Wambig-lit-shift" +#endif //__hlsl_dx_compiler +#include "./ffxm_core.h" +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic pop +#endif //__hlsl_dx_compiler +#endif // #if defined(FFXM_GPU) + +#if defined(FFXM_GPU) +#ifndef FFXM_PREFER_WAVE64 +#define FFXM_PREFER_WAVE64 +#endif // FFXM_PREFER_WAVE64 + +#if defined(FFXM_GPU) +//#pragma warning(disable: 3205) // conversion from larger type to smaller +#endif // #if defined(FFXM_GPU) + +#define DECLARE_SRV_REGISTER(regIndex) t##regIndex +#define DECLARE_UAV_REGISTER(regIndex) u##regIndex +#define DECLARE_CB_REGISTER(regIndex) b##regIndex +#define FFXM_FSR2_DECLARE_SRV(regIndex) register(DECLARE_SRV_REGISTER(regIndex)) +#define FFXM_FSR2_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex)) +#define FFXM_FSR2_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex)) +#define SET_0_CB_START 2 + +// Workaround +#if FFXM_SHADER_PLATFORM_GLES_3_2 +#define FFXM_UAV_RG_QUALIFIER FfxFloat32x4 +#else +#define FFXM_UAV_RG_QUALIFIER FfxFloat32x2 +#endif + +#if defined(FSR2_BIND_CB_FSR2) + [[vk::binding(FSR2_BIND_CB_FSR2 + SET_0_CB_START, 0)]] cbuffer cbFSR2 : FFXM_FSR2_DECLARE_CB(FSR2_BIND_CB_FSR2) + { + FfxInt32x2 iRenderSize; + FfxInt32x2 iMaxRenderSize; + FfxInt32x2 iDisplaySize; + FfxInt32x2 iInputColorResourceDimensions; + FfxInt32x2 iLumaMipDimensions; + FfxInt32 iLumaMipLevelToUse; + FfxInt32 iFrameIndex; + + FfxFloat32x4 fDeviceToViewDepth; + FfxFloat32x2 fJitter; + FfxFloat32x2 fMotionVectorScale; + FfxFloat32x2 fDownscaleFactor; + FfxFloat32x2 fMotionVectorJitterCancellation; + FfxFloat32 fPreExposure; + FfxFloat32 fPreviousFramePreExposure; + FfxFloat32 fTanHalfFOV; + FfxFloat32 fJitterSequenceLength; + FfxFloat32 fDeltaTime; + FfxFloat32 fDynamicResChangeFactor; + FfxFloat32 fViewSpaceToMetersFactor; + }; + +#define FFXM_FSR2_CONSTANT_BUFFER_1_SIZE (sizeof(cbFSR2) / 4) // Number of 32-bit values. This must be kept in sync with the cbFSR2 size. + +/* Define getter functions in the order they are defined in the CB! */ +FfxInt32x2 RenderSize() +{ + return iRenderSize; +} + +FfxInt32x2 MaxRenderSize() +{ + return iMaxRenderSize; +} + +FfxInt32x2 DisplaySize() +{ + return iDisplaySize; +} + +FfxInt32x2 InputColorResourceDimensions() +{ + return iInputColorResourceDimensions; +} + +FfxInt32x2 LumaMipDimensions() +{ + return iLumaMipDimensions; +} + +FfxInt32 LumaMipLevelToUse() +{ + return iLumaMipLevelToUse; +} + +FfxInt32 FrameIndex() +{ + return iFrameIndex; +} + +FfxFloat32x2 Jitter() +{ + return fJitter; +} + +FfxFloat32x4 DeviceToViewSpaceTransformFactors() +{ + return fDeviceToViewDepth; +} + +FfxFloat32x2 MotionVectorScale() +{ + return fMotionVectorScale; +} + +FfxFloat32x2 DownscaleFactor() +{ + return fDownscaleFactor; +} + +FfxFloat32x2 MotionVectorJitterCancellation() +{ + return fMotionVectorJitterCancellation; +} + +FfxFloat32 PreExposure() +{ + return fPreExposure; +} + +FfxFloat32 PreviousFramePreExposure() +{ + return fPreviousFramePreExposure; +} + +FfxFloat32 TanHalfFoV() +{ + return fTanHalfFOV; +} + +FfxFloat32 JitterSequenceLength() +{ + return fJitterSequenceLength; +} + +FfxFloat32 DeltaTime() +{ + return fDeltaTime; +} + +FfxFloat32 DynamicResChangeFactor() +{ + return fDynamicResChangeFactor; +} + +FfxFloat32 ViewSpaceToMetersFactor() +{ + return fViewSpaceToMetersFactor; +} +#endif // #if defined(FSR2_BIND_CB_FSR2) + +#define FFXM_FSR2_ROOTSIG_STRINGIFY(p) FFXM_FSR2_ROOTSIG_STR(p) +#define FFXM_FSR2_ROOTSIG_STR(p) #p +#define FFXM_FSR2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFXM_FSR2_ROOTSIG_STRINGIFY(FFXM_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "DescriptorTable(SRV(t0, numDescriptors = " FFXM_FSR2_ROOTSIG_STRINGIFY(FFXM_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "RootConstants(num32BitConstants=" FFXM_FSR2_ROOTSIG_STRINGIFY(FFXM_FSR2_CONSTANT_BUFFER_1_SIZE) ", b0), " \ + "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \ + "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] + +#define FFXM_FSR2_CONSTANT_BUFFER_2_SIZE 6 // Number of 32-bit values. This must be kept in sync with max( cbRCAS , cbSPD) size. + +#define FFXM_FSR2_CB2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFXM_FSR2_ROOTSIG_STRINGIFY(FFXM_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "DescriptorTable(SRV(t0, numDescriptors = " FFXM_FSR2_ROOTSIG_STRINGIFY(FFXM_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "RootConstants(num32BitConstants=" FFXM_FSR2_ROOTSIG_STRINGIFY(FFXM_FSR2_CONSTANT_BUFFER_1_SIZE) ", b0), " \ + "RootConstants(num32BitConstants=" FFXM_FSR2_ROOTSIG_STRINGIFY(FFXM_FSR2_CONSTANT_BUFFER_2_SIZE) ", b1), " \ + "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \ + "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] +#if defined(FFXM_FSR2_EMBED_ROOTSIG) +#define FFXM_FSR2_EMBED_ROOTSIG_CONTENT FFXM_FSR2_ROOTSIG +#define FFXM_FSR2_EMBED_CB2_ROOTSIG_CONTENT FFXM_FSR2_CB2_ROOTSIG +#else +#define FFXM_FSR2_EMBED_ROOTSIG_CONTENT +#define FFXM_FSR2_EMBED_CB2_ROOTSIG_CONTENT +#endif // #if FFXM_FSR2_EMBED_ROOTSIG + +#if defined(FSR2_BIND_CB_RCAS) +[[vk::binding(FSR2_BIND_CB_RCAS + SET_0_CB_START, 0)]] cbuffer cbRCAS : FFXM_FSR2_DECLARE_CB(FSR2_BIND_CB_RCAS) +{ + FfxUInt32x4 rcasConfig; +}; + +FfxUInt32x4 RCASConfig() +{ + return rcasConfig; +} +#endif // #if defined(FSR2_BIND_CB_RCAS) + + +#if defined(FSR2_BIND_CB_REACTIVE) +[[vk::binding(FSR2_BIND_CB_REACTIVE + SET_0_CB_START, 0)]] cbuffer cbGenerateReactive : FFXM_FSR2_DECLARE_CB(FSR2_BIND_CB_REACTIVE) +{ + FfxFloat32 gen_reactive_scale; + FfxFloat32 gen_reactive_threshold; + FfxFloat32 gen_reactive_binaryValue; + FfxUInt32 gen_reactive_flags; +}; + +FfxFloat32 GenReactiveScale() +{ + return gen_reactive_scale; +} + +FfxFloat32 GenReactiveThreshold() +{ + return gen_reactive_threshold; +} + +FfxFloat32 GenReactiveBinaryValue() +{ + return gen_reactive_binaryValue; +} + +FfxUInt32 GenReactiveFlags() +{ + return gen_reactive_flags; +} +#endif // #if defined(FSR2_BIND_CB_REACTIVE) + +#if defined(FSR2_BIND_CB_SPD) +[[vk::binding(FSR2_BIND_CB_SPD + SET_0_CB_START, 0)]] cbuffer cbSPD : FFXM_FSR2_DECLARE_CB(FSR2_BIND_CB_SPD) { + + FfxUInt32 mips; + FfxUInt32 numWorkGroups; + FfxUInt32x2 workGroupOffset; + FfxUInt32x2 renderSize; +}; + +FfxUInt32 MipCount() +{ + return mips; +} + +FfxUInt32 NumWorkGroups() +{ + return numWorkGroups; +} + +FfxUInt32x2 WorkGroupOffset() +{ + return workGroupOffset; +} + +FfxUInt32x2 SPD_RenderSize() +{ + return renderSize; +} +#endif // #if defined(FSR2_BIND_CB_SPD) + +[[vk::binding(0, 0)]] SamplerState s_PointClamp : register(s0); +[[vk::binding(1, 0)]] SamplerState s_LinearClamp : register(s1); + + // SRVs + #if defined FSR2_BIND_SRV_INPUT_COLOR + [[vk::binding(FSR2_BIND_SRV_INPUT_COLOR, 1)]] Texture2D r_input_color_jittered : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_COLOR); + #endif + #if defined FSR2_BIND_SRV_INPUT_OPAQUE_ONLY + [[vk::binding(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY, 1)]] Texture2D r_input_opaque_only : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY); + #endif + #if defined FSR2_BIND_SRV_INPUT_MOTION_VECTORS + [[vk::binding(FSR2_BIND_SRV_INPUT_MOTION_VECTORS, 1)]] Texture2D r_input_motion_vectors : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_MOTION_VECTORS); + #endif + #if defined FSR2_BIND_SRV_INPUT_DEPTH + [[vk::binding(FSR2_BIND_SRV_INPUT_DEPTH, 1)]] Texture2D r_input_depth : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_DEPTH); + #endif + #if defined FSR2_BIND_SRV_INPUT_EXPOSURE + [[vk::binding(FSR2_BIND_SRV_INPUT_EXPOSURE, 1)]] Texture2D r_input_exposure : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_EXPOSURE); + #endif + #if defined FSR2_BIND_SRV_AUTO_EXPOSURE + [[vk::binding(FSR2_BIND_SRV_AUTO_EXPOSURE, 1)]] Texture2D r_auto_exposure : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_AUTO_EXPOSURE); + #endif + #if defined FSR2_BIND_SRV_REACTIVE_MASK + [[vk::binding(FSR2_BIND_SRV_REACTIVE_MASK, 1)]] Texture2D r_reactive_mask : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_REACTIVE_MASK); + #endif + #if defined FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK + [[vk::binding(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK, 1)]] Texture2D r_transparency_and_composition_mask : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK); + #endif + #if defined FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH + [[vk::binding(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH, 1)]] Texture2D r_reconstructed_previous_nearest_depth : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH); + #endif + #if defined FSR2_BIND_SRV_DILATED_MOTION_VECTORS + [[vk::binding(FSR2_BIND_SRV_DILATED_MOTION_VECTORS, 1)]] Texture2D r_dilated_motion_vectors : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_MOTION_VECTORS); + #endif + #if defined FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS + [[vk::binding(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS, 1)]] Texture2D r_previous_dilated_motion_vectors : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS); + #endif + #if defined FSR2_BIND_SRV_DILATED_DEPTH + [[vk::binding(FSR2_BIND_SRV_DILATED_DEPTH, 1)]] Texture2D r_dilatedDepth : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_DEPTH); + #endif + #if defined FSR2_BIND_SRV_INTERNAL_UPSCALED + [[vk::binding(FSR2_BIND_SRV_INTERNAL_UPSCALED, 1)]] Texture2D r_internal_upscaled_color : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INTERNAL_UPSCALED); + #endif + #if defined FSR2_BIND_SRV_LOCK_STATUS + [[vk::binding(FSR2_BIND_SRV_LOCK_STATUS, 1)]] Texture2D r_lock_status : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_STATUS); + #endif + #if defined FSR2_BIND_SRV_LOCK_INPUT_LUMA + [[vk::binding(FSR2_BIND_SRV_LOCK_INPUT_LUMA, 1)]] Texture2D r_lock_input_luma : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_INPUT_LUMA); + #endif + #if defined FSR2_BIND_SRV_NEW_LOCKS + [[vk::binding(FSR2_BIND_SRV_NEW_LOCKS, 1)]] Texture2D r_new_locks : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_NEW_LOCKS); + #endif + #if defined FSR2_BIND_SRV_PREPARED_INPUT_COLOR + [[vk::binding(FSR2_BIND_SRV_PREPARED_INPUT_COLOR, 1)]] Texture2D r_prepared_input_color : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR); + #endif + #if defined FSR2_BIND_SRV_LUMA_HISTORY + [[vk::binding(FSR2_BIND_SRV_LUMA_HISTORY, 1)]] Texture2D r_luma_history : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY); + #endif + #if defined FSR2_BIND_SRV_RCAS_INPUT + [[vk::binding(FSR2_BIND_SRV_RCAS_INPUT, 1)]] Texture2D r_rcas_input : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RCAS_INPUT); + #endif + #if defined FSR2_BIND_SRV_LANCZOS_LUT + [[vk::binding(FSR2_BIND_SRV_LANCZOS_LUT, 1)]] Texture2D r_lanczos_lut : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LANCZOS_LUT); + #endif + #if defined FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS + [[vk::binding(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS, 1)]] Texture2D r_imgMips : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS); + #endif + #if defined FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT + [[vk::binding(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT, 1)]] Texture2D r_upsample_maximum_bias_lut : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT); + #endif + #if defined FSR2_BIND_SRV_DILATED_REACTIVE_MASKS + [[vk::binding(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS, 1)]] Texture2D r_dilated_reactive_masks : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS); + #endif + + #if defined FSR2_BIND_SRV_TEMPORAL_REACTIVE + [[vk::binding(FSR2_BIND_SRV_TEMPORAL_REACTIVE, 1)]] Texture2D r_internal_temporal_reactive : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_TEMPORAL_REACTIVE); + #endif + + // UAV declarations + #if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH + [[vk::binding(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, 1)]] RWTexture2D rw_reconstructed_previous_nearest_depth : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH); + #endif + #if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS + [[vk::binding(FSR2_BIND_UAV_DILATED_MOTION_VECTORS, 1)]] RWTexture2D rw_dilated_motion_vectors : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_MOTION_VECTORS); + #endif + #if defined FSR2_BIND_UAV_DILATED_DEPTH + [[vk::binding(FSR2_BIND_UAV_DILATED_DEPTH, 1)]] RWTexture2D rw_dilatedDepth : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_DEPTH); + #endif + #if defined FSR2_BIND_UAV_INTERNAL_UPSCALED + [[vk::binding(FSR2_BIND_UAV_INTERNAL_UPSCALED, 1)]] RWTexture2D rw_internal_upscaled_color : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_INTERNAL_UPSCALED); + #endif + #if defined FSR2_BIND_UAV_LOCK_STATUS + [[vk::binding(FSR2_BIND_UAV_LOCK_STATUS, 1)]] RWTexture2D rw_lock_status : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_STATUS); + #endif + #if defined FSR2_BIND_UAV_LOCK_INPUT_LUMA + [[vk::binding(FSR2_BIND_UAV_LOCK_INPUT_LUMA, 1)]] RWTexture2D rw_lock_input_luma : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_INPUT_LUMA); + #endif + #if defined FSR2_BIND_UAV_NEW_LOCKS + [[vk::binding(FSR2_BIND_UAV_NEW_LOCKS, 1)]] RWTexture2D rw_new_locks : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_NEW_LOCKS); + #endif + #if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR + [[vk::binding(FSR2_BIND_UAV_PREPARED_INPUT_COLOR, 1)]] RWTexture2D rw_prepared_input_color : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR); + #endif + #if defined FSR2_BIND_UAV_LUMA_HISTORY + [[vk::binding(FSR2_BIND_UAV_LUMA_HISTORY, 1)]] RWTexture2D rw_luma_history : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY); + #endif + #if defined FSR2_BIND_UAV_UPSCALED_OUTPUT + [[vk::binding(FSR2_BIND_UAV_UPSCALED_OUTPUT, 1)]] RWTexture2D rw_upscaled_output : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_UPSCALED_OUTPUT); + #endif + #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE + [[vk::binding(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, 1)]] globallycoherent RWTexture2D rw_img_mip_shading_change : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE); + #endif + #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 + [[vk::binding(FSR2_BIND_UAV_EXPOSURE_MIP_5, 1)]] globallycoherent RWTexture2D rw_img_mip_5 : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_5); + #endif + #if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS + [[vk::binding(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, 1)]] RWTexture2D rw_dilated_reactive_masks : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS); + #endif + #if defined FSR2_BIND_UAV_EXPOSURE + [[vk::binding(FSR2_BIND_UAV_EXPOSURE, 1)]] RWTexture2D rw_exposure : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE); + #endif + #if defined FSR2_BIND_UAV_AUTO_EXPOSURE + [[vk::binding(FSR2_BIND_UAV_AUTO_EXPOSURE, 1)]] RWTexture2D rw_auto_exposure : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTO_EXPOSURE); + #endif + #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC + [[vk::binding(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC, 1)]] globallycoherent RWTexture2D rw_spd_global_atomic : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC); + #endif + + #if defined FSR2_BIND_UAV_AUTOREACTIVE + [[vk::binding(FSR2_BIND_UAV_AUTOREACTIVE, 1)]] RWTexture2D rw_output_autoreactive : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTOREACTIVE); + #endif + +#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) +FfxFloat32 LoadMipLuma(FfxUInt32x2 iPxPos, FfxUInt32 mipLevel) +{ + return r_imgMips.mips[mipLevel][iPxPos]; +} +#endif + +#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) +FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxUInt32 mipLevel) +{ + return r_imgMips.SampleLevel(s_LinearClamp, fUV, mipLevel); +} +#endif + +#if defined(FSR2_BIND_SRV_INPUT_DEPTH) +FfxFloat32 LoadInputDepth(FfxUInt32x2 iPxPos) +{ + return r_input_depth[iPxPos]; +} +/* + dd00 (-1,1) *------* dd10 (0,-1) + | | + | | + dd01 (-1,0) *------* dd11 (0,0) +*/ +void GatherInputDepthRQuad(FfxFloat32x2 fUV, + FFXM_PARAMETER_INOUT FfxFloat32 dd00, + FFXM_PARAMETER_INOUT FfxFloat32 dd10, + FFXM_PARAMETER_INOUT FfxFloat32 dd01, + FFXM_PARAMETER_INOUT FfxFloat32 dd11) +{ + FfxFloat32x4 rrrr = r_input_depth.GatherRed(s_PointClamp, fUV); + dd01 = FfxFloat32(rrrr.x); + dd11 = FfxFloat32(rrrr.y); + dd10 = FfxFloat32(rrrr.z); + dd00 = FfxFloat32(rrrr.w); +} +#endif + +#if defined(FSR2_BIND_SRV_INPUT_DEPTH) +FfxFloat32 SampleInputDepth(FfxFloat32x2 fUV) +{ + return r_input_depth.SampleLevel(s_LinearClamp, fUV, 0).x; +} +#endif + +#if defined(FSR2_BIND_SRV_REACTIVE_MASK) +FfxFloat32 LoadReactiveMask(FfxUInt32x2 iPxPos) +{ + return r_reactive_mask[iPxPos]; +} +/* + col00 (-1,1) *------* col10 (0,-1) + | | + | | + col01 (-1,0) *------* col11 (0,0) +*/ +void GatherReactiveRQuad(FfxFloat32x2 fUV, + FFXM_PARAMETER_INOUT FFXM_MIN16_F col00, + FFXM_PARAMETER_INOUT FFXM_MIN16_F col10, + FFXM_PARAMETER_INOUT FFXM_MIN16_F col01, + FFXM_PARAMETER_INOUT FFXM_MIN16_F col11) +{ + FFXM_MIN16_F4 rrrr = r_reactive_mask.GatherRed(s_PointClamp, fUV); + col01 = FFXM_MIN16_F(rrrr.x); + col11 = FFXM_MIN16_F(rrrr.y); + col10 = FFXM_MIN16_F(rrrr.z); + col00 = FFXM_MIN16_F(rrrr.w); +} +#endif + +#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) +FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos) +{ + return r_transparency_and_composition_mask[iPxPos]; +} +/* + col00 (-1,1) *------* col10 (0,-1) + | | + | | + col01 (-1,0) *------* col11 (0,0) +*/ +void GatherTransparencyAndCompositionMaskRQuad(FfxFloat32x2 fUV, + FFXM_PARAMETER_INOUT FFXM_MIN16_F col00, + FFXM_PARAMETER_INOUT FFXM_MIN16_F col10, + FFXM_PARAMETER_INOUT FFXM_MIN16_F col01, + FFXM_PARAMETER_INOUT FFXM_MIN16_F col11) +{ + FFXM_MIN16_F4 rrrr = r_transparency_and_composition_mask.GatherRed(s_PointClamp, fUV); + col01 = FFXM_MIN16_F(rrrr.x); + col11 = FFXM_MIN16_F(rrrr.y); + col10 = FFXM_MIN16_F(rrrr.z); + col00 = FFXM_MIN16_F(rrrr.w); +} +#endif + +#if defined(FSR2_BIND_SRV_INPUT_COLOR) +FFXM_MIN16_F3 LoadInputColor(FfxUInt32x2 iPxPos) +{ + return r_input_color_jittered[iPxPos].rgb; +} +/* + col00 (-1,1) *------* col10 (0,-1) + | | + | | + col01 (-1,0) *------* col11 (0,0) +*/ +void GatherInputColorRGBQuad(FfxFloat32x2 fUV, + FFXM_PARAMETER_INOUT FFXM_MIN16_F3 col00, + FFXM_PARAMETER_INOUT FFXM_MIN16_F3 col10, + FFXM_PARAMETER_INOUT FFXM_MIN16_F3 col01, + FFXM_PARAMETER_INOUT FFXM_MIN16_F3 col11) +{ + FFXM_MIN16_F4 rrrr = r_input_color_jittered.GatherRed(s_PointClamp, fUV); + FFXM_MIN16_F4 gggg = r_input_color_jittered.GatherGreen(s_PointClamp, fUV); + FFXM_MIN16_F4 bbbb = r_input_color_jittered.GatherBlue(s_PointClamp, fUV); + col01 = FFXM_MIN16_F3(rrrr.x, gggg.x, bbbb.x); + col11 = FFXM_MIN16_F3(rrrr.y, gggg.y, bbbb.y); + col10 = FFXM_MIN16_F3(rrrr.z, gggg.z, bbbb.z); + col00 = FFXM_MIN16_F3(rrrr.w, gggg.w, bbbb.w); +} +#endif + +#if defined(FSR2_BIND_SRV_INPUT_COLOR) +FFXM_MIN16_F3 SampleInputColor(FfxFloat32x2 fUV) +{ + return r_input_color_jittered.SampleLevel(s_LinearClamp, fUV, 0).rgb; +} +#endif + +#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) +FFXM_MIN16_F3 LoadPreparedInputColor(FfxUInt32x2 iPxPos) +{ + return r_prepared_input_color[iPxPos].xyz; +} +FFXM_MIN16_F3 SamplePreparedInputColor(FfxFloat32x2 fUV) +{ + return r_prepared_input_color.SampleLevel(s_PointClamp, fUV, 0).xyz; +} +/* + col00 (-1,1) *------* col10 (0,-1) + | | + | | + col01 (-1,0) *------* col11 (0,0) +*/ +void GatherPreparedInputColorRGBQuad(FfxFloat32x2 fUV, + FFXM_PARAMETER_INOUT FFXM_MIN16_F3 col00, + FFXM_PARAMETER_INOUT FFXM_MIN16_F3 col10, + FFXM_PARAMETER_INOUT FFXM_MIN16_F3 col01, + FFXM_PARAMETER_INOUT FFXM_MIN16_F3 col11) +{ + FFXM_MIN16_F4 rrrr = r_prepared_input_color.GatherRed(s_PointClamp, fUV); + FFXM_MIN16_F4 gggg = r_prepared_input_color.GatherGreen(s_PointClamp, fUV); + FFXM_MIN16_F4 bbbb = r_prepared_input_color.GatherBlue(s_PointClamp, fUV); + col01 = FFXM_MIN16_F3(rrrr.x, gggg.x, bbbb.x); + col11 = FFXM_MIN16_F3(rrrr.y, gggg.y, bbbb.y); + col10 = FFXM_MIN16_F3(rrrr.z, gggg.z, bbbb.z); + col00 = FFXM_MIN16_F3(rrrr.w, gggg.w, bbbb.w); +} +#endif + +#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS) +FFXM_MIN16_F2 LoadInputMotionVector(FfxUInt32x2 iPxDilatedMotionVectorPos) +{ + FFXM_MIN16_F2 fSrcMotionVector = r_input_motion_vectors[iPxDilatedMotionVectorPos].xy; + + FFXM_MIN16_F2 fUvMotionVector = fSrcMotionVector * MotionVectorScale(); + +#if FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS + fUvMotionVector -= MotionVectorJitterCancellation(); +#endif + + return fUvMotionVector; +} +/* + col00 (-1,1) *------* col10 (0,-1) + | | + | | + col01 (-1,0) *------* col11 (0,0) +*/ +void GatherInputMotionVectorRGQuad(FfxFloat32x2 fUV, + FFXM_PARAMETER_INOUT FFXM_MIN16_F2 col00, + FFXM_PARAMETER_INOUT FFXM_MIN16_F2 col10, + FFXM_PARAMETER_INOUT FFXM_MIN16_F2 col01, + FFXM_PARAMETER_INOUT FFXM_MIN16_F2 col11) +{ + FFXM_MIN16_F4 rrrr = r_input_motion_vectors.GatherRed(s_PointClamp, fUV); + FFXM_MIN16_F4 gggg = r_input_motion_vectors.GatherGreen(s_PointClamp, fUV); + col01 = FFXM_MIN16_F2(rrrr.x, gggg.x) * MotionVectorScale(); + col11 = FFXM_MIN16_F2(rrrr.y, gggg.y) * MotionVectorScale(); + col10 = FFXM_MIN16_F2(rrrr.z, gggg.z) * MotionVectorScale(); + col00 = FFXM_MIN16_F2(rrrr.w, gggg.w) * MotionVectorScale(); +#if FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS + col01 -= MotionVectorJitterCancellation(); + col11 -= MotionVectorJitterCancellation(); + col10 -= MotionVectorJitterCancellation(); + col00 -= MotionVectorJitterCancellation(); +#endif +} +#endif + +#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED) +FFXM_MIN16_F4 LoadHistory(FfxUInt32x2 iPxHistory) +{ + return r_internal_upscaled_color[iPxHistory]; +} +FFXM_MIN16_F4 SampleUpscaledHistory(FfxFloat32x2 fUV) +{ + return r_internal_upscaled_color.SampleLevel(s_LinearClamp, fUV, 0); +} +/* + col00 (-1,1) *------* col10 (0,-1) + | | + | | + col01 (-1,0) *------* col11 (0,0) +*/ +void GatherHistoryColorRGBQuad(FfxFloat32x2 fUV, + FFXM_PARAMETER_INOUT FFXM_MIN16_F4 col00, + FFXM_PARAMETER_INOUT FFXM_MIN16_F4 col10, + FFXM_PARAMETER_INOUT FFXM_MIN16_F4 col01, + FFXM_PARAMETER_INOUT FFXM_MIN16_F4 col11) +{ + FFXM_MIN16_F4 rrrr = r_internal_upscaled_color.GatherRed(s_PointClamp, fUV); + FFXM_MIN16_F4 gggg = r_internal_upscaled_color.GatherGreen(s_PointClamp, fUV); + FFXM_MIN16_F4 bbbb = r_internal_upscaled_color.GatherBlue(s_PointClamp, fUV); + col01 = FFXM_MIN16_F4(rrrr.x, gggg.x, bbbb.x, 0.0f); + col11 = FFXM_MIN16_F4(rrrr.y, gggg.y, bbbb.y, 0.0f); + col10 = FFXM_MIN16_F4(rrrr.z, gggg.z, bbbb.z, 0.0f); + col00 = FFXM_MIN16_F4(rrrr.w, gggg.w, bbbb.w, 0.0f); +} +#endif + +#if defined(FSR2_BIND_UAV_LUMA_HISTORY) +void StoreLumaHistory(FfxUInt32x2 iPxPos, FfxFloat32x4 fLumaHistory) +{ + rw_luma_history[iPxPos] = fLumaHistory; +} +#endif + +#if defined(FSR2_BIND_SRV_LUMA_HISTORY) +FFXM_MIN16_F4 SampleLumaHistory(FfxFloat32x2 fUV) +{ + return r_luma_history.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +FFXM_MIN16_F4 LoadRCAS_Input(FfxInt32x2 iPxPos) +{ +#if defined(FSR2_BIND_SRV_RCAS_INPUT) + return r_rcas_input.Load(FfxInt32x3(iPxPos, 0)); +#else + return 0.0; +#endif +} + +#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) +void StoreReprojectedHistory(FfxUInt32x2 iPxHistory, FfxFloat32x4 fHistory) +{ + rw_internal_upscaled_color[iPxHistory] = fHistory; +} +#endif + +#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) +void StoreInternalColorAndWeight(FfxUInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight) +{ + rw_internal_upscaled_color[iPxPos] = fColorAndWeight; +} +#endif + +#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT) +void StoreUpscaledOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor) +{ + rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor, 1.f); +} +#endif + +//LOCK_LIFETIME_REMAINING == 0 +//Should make LockInitialLifetime() return a const 1.0f later +#if defined(FSR2_BIND_SRV_LOCK_STATUS) +FfxFloat32x2 LoadLockStatus(FfxUInt32x2 iPxPos) +{ + return r_lock_status[iPxPos]; +} +#endif + +#if defined(FSR2_BIND_UAV_LOCK_STATUS) +void StoreLockStatus(FfxUInt32x2 iPxPos, FfxFloat32x2 fLockStatus) +{ + rw_lock_status[iPxPos] = fLockStatus; +} +#endif + +#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA) +FFXM_MIN16_F LoadLockInputLuma(FfxUInt32x2 iPxPos) +{ + return r_lock_input_luma[iPxPos]; +} +/* + col00 (-1,1) *------* col10 (0,-1) + | | + | | + col01 (-1,0) *------* col11 (0,0) +*/ +void GatherLockInputLumaRQuad(FfxFloat32x2 fUV, + FFXM_PARAMETER_INOUT FFXM_MIN16_F col00, + FFXM_PARAMETER_INOUT FFXM_MIN16_F col10, + FFXM_PARAMETER_INOUT FFXM_MIN16_F col01, + FFXM_PARAMETER_INOUT FFXM_MIN16_F col11) +{ + FFXM_MIN16_F4 rrrr = r_lock_input_luma.GatherRed(s_PointClamp, fUV); + col01 = FFXM_MIN16_F(rrrr.x); + col11 = FFXM_MIN16_F(rrrr.y); + col10 = FFXM_MIN16_F(rrrr.z); + col00 = FFXM_MIN16_F(rrrr.w); +} +#endif + +#if defined(FSR2_BIND_SRV_NEW_LOCKS) +FfxFloat32 LoadNewLocks(FfxUInt32x2 iPxPos) +{ + return r_new_locks[iPxPos]; +} +#endif + +#if defined(FSR2_BIND_UAV_NEW_LOCKS) +FFXM_MIN16_F LoadRwNewLocks(FfxUInt32x2 iPxPos) +{ + return rw_new_locks[iPxPos]; +} +#endif + +#if defined(FSR2_BIND_UAV_NEW_LOCKS) +void StoreNewLocks(FfxUInt32x2 iPxPos, FfxFloat32 newLock) +{ + rw_new_locks[iPxPos] = newLock; +} +#endif + +#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) +FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV) +{ + return r_prepared_input_color.SampleLevel(s_LinearClamp, fUV, 0).w; +} +#endif + +#if defined(FSR2_BIND_SRV_LOCK_STATUS) +FFXM_MIN16_F2 SampleLockStatus(FfxFloat32x2 fUV) +{ + FFXM_MIN16_F2 fLockStatus = r_lock_status.SampleLevel(s_LinearClamp, fUV, 0); + return fLockStatus; +} +#endif + +#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) +FfxFloat32 LoadReconstructedPrevDepth(FfxUInt32x2 iPxPos) +{ + return asfloat(r_reconstructed_previous_nearest_depth[iPxPos]); +} +/* + d00 (-1,1) *------* d10 (0,-1) + | | + | | + d01 (-1,0) *------* d11 (0,0) +*/ +void GatherReconstructedPreviousDepthRQuad(FfxFloat32x2 fUV, + FFXM_PARAMETER_INOUT FfxFloat32 d00, + FFXM_PARAMETER_INOUT FfxFloat32 d10, + FFXM_PARAMETER_INOUT FfxFloat32 d01, + FFXM_PARAMETER_INOUT FfxFloat32 d11) +{ + FfxUInt32x4 rrrr = r_reconstructed_previous_nearest_depth.GatherRed(s_PointClamp, fUV); + d01 = FfxFloat32(asfloat(rrrr.x)); + d11 = FfxFloat32(asfloat(rrrr.y)); + d10 = FfxFloat32(asfloat(rrrr.z)); + d00 = FfxFloat32(asfloat(rrrr.w)); +} +#endif + +#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) +void StoreReconstructedDepth(FfxUInt32x2 iPxSample, FfxFloat32 fDepth) +{ + FfxUInt32 uDepth = asuint(fDepth); + + #if FFXM_FSR2_OPTION_INVERTED_DEPTH + InterlockedMax(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); + #else + InterlockedMin(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); // min for standard, max for inverted depth + #endif +} +#endif + +#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) +void SetReconstructedDepth(FfxUInt32x2 iPxSample, const FfxUInt32 uValue) +{ + rw_reconstructed_previous_nearest_depth[iPxSample] = uValue; +} +#endif + +#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) +FFXM_MIN16_F2 LoadDilatedMotionVector(FfxUInt32x2 iPxInput) +{ + return r_dilated_motion_vectors[iPxInput].xy; +} +#endif + +#if defined(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) +FFXM_MIN16_F2 LoadPreviousDilatedMotionVector(FfxUInt32x2 iPxInput) +{ + return r_previous_dilated_motion_vectors[iPxInput].xy; +} + +FFXM_MIN16_F2 SamplePreviousDilatedMotionVector(FfxFloat32x2 uv) +{ + return r_previous_dilated_motion_vectors.SampleLevel(s_LinearClamp, uv, 0).xy; +} +#endif + +#if defined(FSR2_BIND_SRV_DILATED_DEPTH) +FfxFloat32 LoadDilatedDepth(FfxUInt32x2 iPxInput) +{ + return r_dilatedDepth[iPxInput]; +} +/* + dd00 (-1,1) *------* dd10 (0,-1) + | | + | | + dd01 (-1,0) *------* dd11 (0,0) +*/ +void GatherDilatedDepthRQuad(FfxFloat32x2 fUV, + FFXM_PARAMETER_INOUT FfxFloat32 dd00, + FFXM_PARAMETER_INOUT FfxFloat32 dd10, + FFXM_PARAMETER_INOUT FfxFloat32 dd01, + FFXM_PARAMETER_INOUT FfxFloat32 dd11) +{ + FfxFloat32x4 rrrr = r_dilatedDepth.GatherRed(s_PointClamp, fUV); + dd01 = FfxFloat32(rrrr.x); + dd11 = FfxFloat32(rrrr.y); + dd10 = FfxFloat32(rrrr.z); + dd00 = FfxFloat32(rrrr.w); +} +#endif + +#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE) +FfxFloat32 Exposure() +{ + FfxFloat32 exposure = r_input_exposure[FfxUInt32x2(0, 0)].x; + + if (exposure == 0.0f) { + exposure = 1.0f; + } + + return exposure; +} +#endif + +#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE) +FfxFloat32 AutoExposure() +{ + FfxFloat32 exposure = r_auto_exposure[FfxUInt32x2(0, 0)].x; + + if (exposure == 0.0f) { + exposure = 1.0f; + } + + return exposure; +} +#endif + +FfxFloat32 SampleLanczos2Weight(FfxFloat32 x) +{ +#if defined(FSR2_BIND_SRV_LANCZOS_LUT) + return r_lanczos_lut.SampleLevel(s_LinearClamp, FfxFloat32x2(x / 2, 0.5f), 0); +#else + return 0.f; +#endif +} + +#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) +FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv) +{ + // Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range. + return FfxFloat32(2.0) * r_upsample_maximum_bias_lut.SampleLevel(s_LinearClamp, abs(uv) * 2.0, 0); +} +#endif + +#if defined(FSR2_BIND_SRV_TEMPORAL_REACTIVE) +FfxFloat32 SampleTemporalReactive(FfxFloat32x2 fUV) +{ + return r_internal_temporal_reactive.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) +FFXM_MIN16_F2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV) +{ + return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) +FFXM_MIN16_F2 LoadDilatedReactiveMasks(FFXM_PARAMETER_IN FfxUInt32x2 iPxPos) +{ + return r_dilated_reactive_masks[iPxPos]; +} +#endif + +#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) +FfxFloat32x3 LoadOpaqueOnly(FFXM_PARAMETER_IN FFXM_MIN16_I2 iPxPos) +{ + return r_input_opaque_only[iPxPos].xyz; +} +#endif + +FfxFloat32x2 SPD_LoadExposureBuffer() +{ +#if defined FSR2_BIND_UAV_AUTO_EXPOSURE + return rw_auto_exposure[FfxInt32x2(0, 0)].rg; +#else + return FfxFloat32x2(0.f, 0.f); +#endif // #if defined FSR2_BIND_UAV_AUTO_EXPOSURE +} + +void SPD_SetExposureBuffer(FfxFloat32x2 value) +{ +#if defined FSR2_BIND_UAV_AUTO_EXPOSURE +#if FFXM_SHADER_PLATFORM_GLES_3_2 + rw_auto_exposure[FfxInt32x2(0, 0)] = FfxInt32x4(value, 0.0f, 0.0f); +#else + rw_auto_exposure[FfxInt32x2(0, 0)] = value; +#endif +#endif // #if defined FSR2_BIND_UAV_AUTO_EXPOSURE +} + +FfxFloat32x4 SPD_LoadMipmap5(FfxInt32x2 iPxPos) +{ +#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 + return FfxFloat32x4(rw_img_mip_5[iPxPos], 0, 0, 0); +#else + return FfxFloat32x4(0.f, 0.f, 0.f, 0.f); +#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 +} + +void SPD_SetMipmap(FfxInt32x2 iPxPos, FfxUInt32 slice, FfxFloat32 value) +{ + switch (slice) + { + case FFXM_FSR2_SHADING_CHANGE_MIP_LEVEL: +#if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE + rw_img_mip_shading_change[iPxPos] = value; +#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE + break; + case 5: +#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 + rw_img_mip_5[iPxPos] = value; +#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 + break; + default: + + // avoid flattened side effect +#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE) + rw_img_mip_shading_change[iPxPos] = rw_img_mip_shading_change[iPxPos]; +#elif defined(FSR2_BIND_UAV_EXPOSURE_MIP_5) + rw_img_mip_5[iPxPos] = rw_img_mip_5[iPxPos]; +#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 + break; + } +} + +void SPD_IncreaseAtomicCounter(inout FfxUInt32 spdCounter) +{ +#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC + InterlockedAdd(rw_spd_global_atomic[FfxInt32x2(0, 0)], 1, spdCounter); +#endif // #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC +} + +void SPD_ResetAtomicCounter() +{ +#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC + rw_spd_global_atomic[FfxInt32x2(0, 0)] = 0; +#endif // #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC +} + +#endif // #if defined(FFXM_GPU) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h.meta new file mode 100644 index 0000000..fc47d4c --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h.meta @@ -0,0 +1,67 @@ +fileFormatVersion: 2 +guid: 2dc46407945236c43a0c460b616f4204 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Android: 1 + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_common.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_common.h new file mode 100644 index 0000000..4a13e6f --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_common.h @@ -0,0 +1,595 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#if !defined(FFXM_FSR2_COMMON_H) +#define FFXM_FSR2_COMMON_H + +#if defined(FFXM_CPU) || defined(FFXM_GPU) +//Locks +#define LOCK_LIFETIME_REMAINING 0 +#define LOCK_TEMPORAL_LUMA 1 +#endif // #if defined(FFXM_CPU) || defined(FFXM_GPU) + +#if defined(FFXM_GPU) +FFXM_STATIC const FfxFloat32 FSR2_FP16_MIN = 6.10e-05f; +FFXM_STATIC const FfxFloat32 FSR2_FP16_MAX = 65504.0f; +FFXM_STATIC const FfxFloat32 FSR2_EPSILON = 1e-03f; +FFXM_STATIC const FfxFloat32 FSR2_TONEMAP_EPSILON = 1.0f / FSR2_FP16_MAX; +FFXM_STATIC const FfxFloat32 FSR2_FLT_MAX = 3.402823466e+38f; +FFXM_STATIC const FfxFloat32 FSR2_FLT_MIN = 1.175494351e-38f; + +// treat vector truncation warnings as errors +#pragma warning(error: 3206) + +// suppress warnings +#pragma warning(disable: 3205) // conversion from larger type to smaller +#pragma warning(disable: 3571) // in ffxPow(f, e), f could be negative + +// Reconstructed depth usage +FFXM_STATIC const FfxFloat32 fReconstructedDepthBilinearWeightThreshold = 0.01f; + +// Accumulation +#if !FFXM_SHADER_QUALITY_OPT_UPSCALING_LANCZOS_5TAP +FFXM_STATIC const FfxFloat32 fUpsampleLanczosWeightScale = 1.0f / 9.0f; +#else +FFXM_STATIC const FfxFloat32 fUpsampleLanczosWeightScale = 1.0f / 5.0f; +#endif +FFXM_STATIC const FfxFloat32 fMaxAccumulationLanczosWeight = 1.0f; +FFXM_STATIC const FfxFloat32 fAverageLanczosWeightPerFrame = 0.74f * fUpsampleLanczosWeightScale; // Average lanczos weight for jitter accumulated samples +FFXM_STATIC const FfxFloat32 fAccumulationMaxOnMotion = 3.0f * fUpsampleLanczosWeightScale; + +// Auto exposure +FFXM_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e8f; + +// Optimizations defines +#ifndef FFXM_OPT_USE_GATHER_OPS +#define FFXM_OPT_USE_GATHER_OPS 0 +#endif + +struct AccumulationPassCommonParams +{ + FfxInt32x2 iPxHrPos; + FfxFloat32x2 fHrUv; + FfxFloat32x2 fLrUv_HwSampler; + FfxFloat32x2 fMotionVector; + FfxFloat32x2 fReprojectedHrUv; + FfxFloat32 fHrVelocity; + FFXM_MIN16_F fDepthClipFactor; + FFXM_MIN16_F fDilatedReactiveFactor; + FFXM_MIN16_F fAccumulationMask; + + //FfxBoolean bIsResetFrame; + FfxBoolean bIsExistingSample; + FfxBoolean bIsNewSample; +}; + +struct LockState +{ + FfxBoolean NewLock; //Set for both unique new and re-locked new + FfxBoolean WasLockedPrevFrame; //Set to identify if the pixel was already locked (relock) +}; + +void InitializeNewLockSample(FFXM_PARAMETER_OUT FfxFloat32x2 fLockStatus) +{ + fLockStatus = FfxFloat32x2(0, 0); +} + +#if FFXM_HALF +void InitializeNewLockSample(FFXM_PARAMETER_OUT FFXM_MIN16_F2 fLockStatus) +{ + fLockStatus = FFXM_MIN16_F2(0, 0); +} +#endif + + +void KillLock(FFXM_PARAMETER_INOUT FfxFloat32x2 fLockStatus) +{ + fLockStatus[LOCK_LIFETIME_REMAINING] = 0; +} + +#if FFXM_HALF +void KillLock(FFXM_PARAMETER_INOUT FFXM_MIN16_F2 fLockStatus) +{ + fLockStatus[LOCK_LIFETIME_REMAINING] = FFXM_MIN16_F(0); +} +#endif + +struct RectificationBox +{ + FfxFloat32x3 boxCenter; + FfxFloat32x3 boxVec; + FfxFloat32x3 aabbMin; + FfxFloat32x3 aabbMax; + FfxFloat32 fBoxCenterWeight; +}; +#if FFXM_HALF +struct RectificationBoxMin16 +{ + FFXM_MIN16_F3 boxCenter; + FFXM_MIN16_F3 boxVec; + FFXM_MIN16_F3 aabbMin; + FFXM_MIN16_F3 aabbMax; + FFXM_MIN16_F fBoxCenterWeight; +}; +#endif + +void RectificationBoxReset(FFXM_PARAMETER_INOUT RectificationBox rectificationBox) +{ + rectificationBox.fBoxCenterWeight = FfxFloat32(0); + + rectificationBox.boxCenter = FfxFloat32x3(0, 0, 0); + rectificationBox.boxVec = FfxFloat32x3(0, 0, 0); + rectificationBox.aabbMin = FfxFloat32x3(FSR2_FLT_MAX, FSR2_FLT_MAX, FSR2_FLT_MAX); + rectificationBox.aabbMax = -FfxFloat32x3(FSR2_FLT_MAX, FSR2_FLT_MAX, FSR2_FLT_MAX); +} +#if FFXM_HALF +void RectificationBoxReset(FFXM_PARAMETER_INOUT RectificationBoxMin16 rectificationBox) +{ + rectificationBox.fBoxCenterWeight = FFXM_MIN16_F(0); + + rectificationBox.boxCenter = FFXM_MIN16_F3(0, 0, 0); + rectificationBox.boxVec = FFXM_MIN16_F3(0, 0, 0); + rectificationBox.aabbMin = FFXM_MIN16_F3(FSR2_FP16_MAX, FSR2_FP16_MAX, FSR2_FP16_MAX); + rectificationBox.aabbMax = -FFXM_MIN16_F3(FSR2_FP16_MAX, FSR2_FP16_MAX, FSR2_FP16_MAX); +} +#endif + +void RectificationBoxAddInitialSample(FFXM_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight) +{ + rectificationBox.aabbMin = colorSample; + rectificationBox.aabbMax = colorSample; + + FfxFloat32x3 weightedSample = colorSample * fSampleWeight; + rectificationBox.boxCenter = weightedSample; + rectificationBox.boxVec = colorSample * weightedSample; + rectificationBox.fBoxCenterWeight = fSampleWeight; +} + +void RectificationBoxAddSample(FfxBoolean bInitialSample, FFXM_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight) +{ + if (bInitialSample) { + RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight); + } else { + rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample); + rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample); + + FfxFloat32x3 weightedSample = colorSample * fSampleWeight; + rectificationBox.boxCenter += weightedSample; + rectificationBox.boxVec += colorSample * weightedSample; + rectificationBox.fBoxCenterWeight += fSampleWeight; + } +} +#if FFXM_HALF +void RectificationBoxAddInitialSample(FFXM_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFXM_MIN16_F3 colorSample, const FFXM_MIN16_F fSampleWeight) +{ + rectificationBox.aabbMin = colorSample; + rectificationBox.aabbMax = colorSample; + + FFXM_MIN16_F3 weightedSample = colorSample * fSampleWeight; + rectificationBox.boxCenter = weightedSample; + rectificationBox.boxVec = colorSample * weightedSample; + rectificationBox.fBoxCenterWeight = fSampleWeight; +} + +void RectificationBoxAddSample(FfxBoolean bInitialSample, FFXM_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFXM_MIN16_F3 colorSample, const FFXM_MIN16_F fSampleWeight) +{ + if (bInitialSample) { + RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight); + } else { + rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample); + rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample); + + FFXM_MIN16_F3 weightedSample = colorSample * fSampleWeight; + rectificationBox.boxCenter += weightedSample; + rectificationBox.boxVec += colorSample * weightedSample; + rectificationBox.fBoxCenterWeight += fSampleWeight; + } +} +#endif + +void RectificationBoxComputeVarianceBoxData(FFXM_PARAMETER_INOUT RectificationBox rectificationBox) +{ + rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FfxFloat32(FSR2_EPSILON) ? rectificationBox.fBoxCenterWeight : FfxFloat32(1.f)); + rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight; + rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight; + FfxFloat32x3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter)); + rectificationBox.boxVec = stdDev; +} +#if FFXM_HALF +void RectificationBoxComputeVarianceBoxData(FFXM_PARAMETER_INOUT RectificationBoxMin16 rectificationBox) +{ + rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FFXM_MIN16_F(FSR2_EPSILON) ? rectificationBox.fBoxCenterWeight : FFXM_MIN16_F(1.f)); + rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight; + rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight; + FFXM_MIN16_F3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter)); + rectificationBox.boxVec = stdDev; +} +#endif + +FfxFloat32x3 SafeRcp3(FfxFloat32x3 v) +{ + return (all(FFXM_NOT_EQUAL(v, FfxFloat32x3(0, 0, 0)))) ? (FfxFloat32x3(1, 1, 1) / v) : FfxFloat32x3(0, 0, 0); +} +#if FFXM_HALF +FFXM_MIN16_F3 SafeRcp3(FFXM_MIN16_F3 v) +{ + return (all(FFXM_NOT_EQUAL(v, FFXM_MIN16_F3(0, 0, 0)))) ? (FFXM_MIN16_F3(1, 1, 1) / v) : FFXM_MIN16_F3(0, 0, 0); +} +#endif + +FfxFloat32 MinDividedByMax(const FfxFloat32 v0, const FfxFloat32 v1) +{ + const FfxFloat32 m = ffxMax(v0, v1); + return m != 0 ? ffxMin(v0, v1) / m : 0; +} + +#if FFXM_HALF +FFXM_MIN16_F MinDividedByMax(const FFXM_MIN16_F v0, const FFXM_MIN16_F v1) +{ + const FFXM_MIN16_F m = ffxMax(v0, v1); + return m != FFXM_MIN16_F(0) ? ffxMin(v0, v1) / m : FFXM_MIN16_F(0); +} +#endif + +FfxFloat32x3 YCoCgToRGB(FfxFloat32x3 fYCoCg) +{ + FfxFloat32x3 fRgb; + + fRgb = FfxFloat32x3( + fYCoCg.x + fYCoCg.y - fYCoCg.z, + fYCoCg.x + fYCoCg.z, + fYCoCg.x - fYCoCg.y - fYCoCg.z); + + return fRgb; +} +#if FFXM_HALF +FFXM_MIN16_F3 YCoCgToRGB(FFXM_MIN16_F3 fYCoCg) +{ + FFXM_MIN16_F3 fRgb; + + fRgb = FFXM_MIN16_F3( + fYCoCg.x + fYCoCg.y - fYCoCg.z, + fYCoCg.x + fYCoCg.z, + fYCoCg.x - fYCoCg.y - fYCoCg.z); + + return fRgb; +} +#endif + +FfxFloat32x3 RGBToYCoCg(FfxFloat32x3 fRgb) +{ + FfxFloat32x3 fYCoCg; + + fYCoCg = FfxFloat32x3( + 0.25f * fRgb.r + 0.5f * fRgb.g + 0.25f * fRgb.b, + 0.5f * fRgb.r - 0.5f * fRgb.b, + -0.25f * fRgb.r + 0.5f * fRgb.g - 0.25f * fRgb.b); + + return fYCoCg; +} +#if FFXM_HALF +FFXM_MIN16_F3 RGBToYCoCg(FFXM_MIN16_F3 fRgb) +{ + FFXM_MIN16_F3 fYCoCg; + + fYCoCg = FFXM_MIN16_F3( + 0.25 * fRgb.r + 0.5 * fRgb.g + 0.25 * fRgb.b, + 0.5 * fRgb.r - 0.5 * fRgb.b, + -0.25 * fRgb.r + 0.5 * fRgb.g - 0.25 * fRgb.b); + + return fYCoCg; +} +#endif + +FfxFloat32 RGBToLuma(FfxFloat32x3 fLinearRgb) +{ + return dot(fLinearRgb, FfxFloat32x3(0.2126f, 0.7152f, 0.0722f)); +} +#if FFXM_HALF +FFXM_MIN16_F RGBToLuma(FFXM_MIN16_F3 fLinearRgb) +{ + return dot(fLinearRgb, FFXM_MIN16_F3(0.2126f, 0.7152f, 0.0722f)); +} +#endif + +FfxFloat32 RGBToPerceivedLuma(FfxFloat32x3 fLinearRgb) +{ + FfxFloat32 fLuminance = RGBToLuma(fLinearRgb); + + FfxFloat32 fPercievedLuminance = 0; + if (fLuminance <= 216.0f / 24389.0f) { + fPercievedLuminance = fLuminance * (24389.0f / 27.0f); + } + else { + fPercievedLuminance = ffxPow(fLuminance, 1.0f / 3.0f) * 116.0f - 16.0f; + } + + return fPercievedLuminance * 0.01f; +} +#if FFXM_HALF +FFXM_MIN16_F RGBToPerceivedLuma(FFXM_MIN16_F3 fLinearRgb) +{ + FFXM_MIN16_F fLuminance = RGBToLuma(fLinearRgb); + + FFXM_MIN16_F fPercievedLuminance = FFXM_MIN16_F(0); + if (fLuminance <= FFXM_MIN16_F(216.0f / 24389.0f)) { + fPercievedLuminance = fLuminance * FFXM_MIN16_F(24389.0f / 27.0f); + } + else { + fPercievedLuminance = ffxPow(fLuminance, FFXM_MIN16_F(1.0f / 3.0f)) * FFXM_MIN16_F(116.0f) - FFXM_MIN16_F(16.0f); + } + + return fPercievedLuminance * FFXM_MIN16_F(0.01f); +} +#endif + +FfxFloat32x3 Tonemap(FfxFloat32x3 fRgb) +{ + return fRgb / (ffxMax(ffxMax(0.f, fRgb.r), ffxMax(fRgb.g, fRgb.b)) + 1.f).xxx; +} + +FfxFloat32x3 InverseTonemap(FfxFloat32x3 fRgb) +{ + return fRgb / ffxMax(FSR2_TONEMAP_EPSILON, 1.f - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx; +} + +#if FFXM_HALF +FFXM_MIN16_F3 Tonemap(FFXM_MIN16_F3 fRgb) +{ + return fRgb / (ffxMax(ffxMax(FFXM_MIN16_F(0.f), fRgb.r), ffxMax(fRgb.g, fRgb.b)) + FFXM_MIN16_F(1.f)).xxx; +} + +FFXM_MIN16_F3 InverseTonemap(FFXM_MIN16_F3 fRgb) +{ + return fRgb / ffxMax(FFXM_MIN16_F(FSR2_TONEMAP_EPSILON), FFXM_MIN16_F(1.f) - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx; +} +#endif + +FfxInt32x2 ClampLoad(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize) +{ + FfxInt32x2 result = iPxSample + iPxOffset; + result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x; + result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x; + result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y; + result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y; + return result; + + // return ffxMed3(iPxSample + iPxOffset, FfxInt32x2(0, 0), iTextureSize - FfxInt32x2(1, 1)); +} +#if FFXM_HALF +FFXM_MIN16_I2 ClampLoad(FFXM_MIN16_I2 iPxSample, FFXM_MIN16_I2 iPxOffset, FFXM_MIN16_I2 iTextureSize) +{ + FFXM_MIN16_I2 result = iPxSample + iPxOffset; + result.x = (iPxOffset.x < 0) ? ffxMax(result.x, FFXM_MIN16_I(0)) : result.x; + result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - FFXM_MIN16_I(1)) : result.x; + result.y = (iPxOffset.y < 0) ? ffxMax(result.y, FFXM_MIN16_I(0)) : result.y; + result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - FFXM_MIN16_I(1)) : result.y; + return result; + + // return ffxMed3Half(iPxSample + iPxOffset, FFXM_MIN16_I2(0, 0), iTextureSize - FFXM_MIN16_I2(1, 1)); +} +#endif + +FfxFloat32x2 ClampUv(FfxFloat32x2 fUv, FfxInt32x2 iTextureSize, FfxInt32x2 iResourceSize) +{ + const FfxFloat32x2 fSampleLocation = fUv * iTextureSize; + const FfxFloat32x2 fClampedLocation = ffxMax(FfxFloat32x2(0.5f, 0.5f), ffxMin(fSampleLocation, FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f))); + const FfxFloat32x2 fClampedUv = fClampedLocation / FfxFloat32x2(iResourceSize); + + return fClampedUv; +} + +FfxBoolean IsOnScreen(FfxInt32x2 pos, FfxInt32x2 size) +{ + return all(FFXM_LESS_THAN(FfxUInt32x2(pos), FfxUInt32x2(size))); +} +#if FFXM_HALF +FfxBoolean IsOnScreen(FFXM_MIN16_I2 pos, FFXM_MIN16_I2 size) +{ + return all(FFXM_LESS_THAN(FFXM_MIN16_U2(pos), FFXM_MIN16_U2(size))); +} +#endif + +FfxFloat32 ComputeAutoExposureFromLavg(FfxFloat32 Lavg) +{ + Lavg = exp(Lavg); + + const FfxFloat32 S = 100.0f; //ISO arithmetic speed + const FfxFloat32 K = 12.5f; + FfxFloat32 ExposureISO100 = log2((Lavg * S) / K); + + const FfxFloat32 q = 0.65f; + FfxFloat32 Lmax = (78.0f / (q * S)) * ffxPow(2.0f, ExposureISO100); + + return 1 / Lmax; +} +#if FFXM_HALF +FFXM_MIN16_F ComputeAutoExposureFromLavg(FFXM_MIN16_F Lavg) +{ + Lavg = exp(Lavg); + + const FFXM_MIN16_F S = FFXM_MIN16_F(100.0f); //ISO arithmetic speed + const FFXM_MIN16_F K = FFXM_MIN16_F(12.5f); + const FFXM_MIN16_F ExposureISO100 = log2((Lavg * S) / K); + + const FFXM_MIN16_F q = FFXM_MIN16_F(0.65f); + const FFXM_MIN16_F Lmax = (FFXM_MIN16_F(78.0f) / (q * S)) * ffxPow(FFXM_MIN16_F(2.0f), ExposureISO100); + + return FFXM_MIN16_F(1) / Lmax; +} +#endif + +FfxInt32x2 ComputeHrPosFromLrPos(FfxInt32x2 iPxLrPos) +{ + FfxFloat32x2 fSrcJitteredPos = FfxFloat32x2(iPxLrPos) + 0.5f - Jitter(); + FfxFloat32x2 fLrPosInHr = (fSrcJitteredPos / RenderSize()) * DisplaySize(); + FfxInt32x2 iPxHrPos = FfxInt32x2(floor(fLrPosInHr)); + return iPxHrPos; +} +#if FFXM_HALF +FFXM_MIN16_I2 ComputeHrPosFromLrPos(FFXM_MIN16_I2 iPxLrPos) +{ + FFXM_MIN16_F2 fSrcJitteredPos = FFXM_MIN16_F2(iPxLrPos) + FFXM_MIN16_F(0.5f) - FFXM_MIN16_F2(Jitter()); + FFXM_MIN16_F2 fLrPosInHr = (fSrcJitteredPos / FFXM_MIN16_F2(RenderSize())) * FFXM_MIN16_F2(DisplaySize()); + FFXM_MIN16_I2 iPxHrPos = FFXM_MIN16_I2(floor(fLrPosInHr)); + return iPxHrPos; +} +#endif + +FfxFloat32x2 ComputeNdc(FfxFloat32x2 fPxPos, FfxInt32x2 iSize) +{ + return fPxPos / FfxFloat32x2(iSize) * FfxFloat32x2(2.0f, -2.0f) + FfxFloat32x2(-1.0f, 1.0f); +} + +FfxFloat32 GetViewSpaceDepth(FfxFloat32 fDeviceDepth) +{ + const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors(); + + // fDeviceToViewDepth details found in ffx_fsr2.cpp + return (fDeviceToViewDepth[1] / (fDeviceDepth - fDeviceToViewDepth[0])); +} + +FfxFloat32 GetViewSpaceDepthInMeters(FfxFloat32 fDeviceDepth) +{ + return GetViewSpaceDepth(fDeviceDepth) * ViewSpaceToMetersFactor(); +} + +FfxFloat32x3 GetViewSpacePosition(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth) +{ + const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors(); + + const FfxFloat32 Z = GetViewSpaceDepth(fDeviceDepth); + + const FfxFloat32x2 fNdcPos = ComputeNdc(iViewportPos, iViewportSize); + const FfxFloat32 X = fDeviceToViewDepth[2] * fNdcPos.x * Z; + const FfxFloat32 Y = fDeviceToViewDepth[3] * fNdcPos.y * Z; + + return FfxFloat32x3(X, Y, Z); +} + +FfxFloat32x3 GetViewSpacePositionInMeters(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth) +{ + return GetViewSpacePosition(iViewportPos, iViewportSize, fDeviceDepth) * ViewSpaceToMetersFactor(); +} + +FfxFloat32 GetMaxDistanceInMeters() +{ +#if FFXM_FSR2_OPTION_INVERTED_DEPTH + return GetViewSpaceDepth(0.0f) * ViewSpaceToMetersFactor(); +#else + return GetViewSpaceDepth(1.0f) * ViewSpaceToMetersFactor(); +#endif +} + +FfxFloat32x3 PrepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure, FfxFloat32 fPreExposure) +{ + fRgb /= fPreExposure; + fRgb *= fExposure; + + fRgb = clamp(fRgb, 0.0f, FSR2_FP16_MAX); + + return fRgb; +} + +FfxFloat32x3 UnprepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure) +{ + fRgb /= fExposure; + fRgb *= PreExposure(); + + return fRgb; +} + +#if FFXM_HALF +FfxFloat16x3 PrepareRgb(FfxFloat16x3 fRgb, FfxFloat16 fExposure, FfxFloat16 fPreExposure) +{ + fRgb /= fPreExposure; + fRgb *= fExposure; + + fRgb = clamp(fRgb, FfxFloat16(0.0f), FfxFloat16(FSR2_FP16_MAX)); + + return fRgb; +} + +FfxFloat16x3 UnprepareRgb(FfxFloat16x3 fRgb, FfxFloat16 fExposure) +{ + fRgb /= fExposure; + fRgb *= FfxFloat16(PreExposure()); + + return fRgb; +} +#endif + +struct BilinearSamplingData +{ + FfxInt32x2 iOffsets[4]; + FfxFloat32 fWeights[4]; + FfxInt32x2 iBasePos; + FfxFloat32x2 fQuadCenterUv; +}; + +BilinearSamplingData GetBilinearSamplingData(FfxFloat32x2 fUv, FfxInt32x2 iSize) +{ + BilinearSamplingData data; + + FfxFloat32x2 fPxSample = (fUv * iSize) - FfxFloat32x2(0.5f, 0.5f); + data.iBasePos = FfxInt32x2(floor(fPxSample)); + data.fQuadCenterUv = fPxSample / FfxFloat32x2(iSize); + FfxFloat32x2 fPxFrac = ffxFract(fPxSample); + + data.iOffsets[0] = FfxInt32x2(0, 0); + data.iOffsets[1] = FfxInt32x2(1, 0); + data.iOffsets[2] = FfxInt32x2(0, 1); + data.iOffsets[3] = FfxInt32x2(1, 1); + + data.fWeights[0] = (1 - fPxFrac.x) * (1 - fPxFrac.y); + data.fWeights[1] = (fPxFrac.x) * (1 - fPxFrac.y); + data.fWeights[2] = (1 - fPxFrac.x) * (fPxFrac.y); + data.fWeights[3] = (fPxFrac.x) * (fPxFrac.y); + + return data; +} + +struct PlaneData +{ + FfxFloat32x3 fNormal; + FfxFloat32 fDistanceFromOrigin; +}; + +PlaneData GetPlaneFromPoints(FfxFloat32x3 fP0, FfxFloat32x3 fP1, FfxFloat32x3 fP2) +{ + PlaneData plane; + + FfxFloat32x3 v0 = fP0 - fP1; + FfxFloat32x3 v1 = fP0 - fP2; + plane.fNormal = normalize(cross(v0, v1)); + plane.fDistanceFromOrigin = -dot(fP0, plane.fNormal); + + return plane; +} + +FfxFloat32 PointToPlaneDistance(PlaneData plane, FfxFloat32x3 fPoint) +{ + return abs(dot(plane.fNormal, fPoint) + plane.fDistanceFromOrigin); +} + +#endif // #if defined(FFXM_GPU) + +#endif //!defined(FFXM_FSR2_COMMON_H) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_common.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_common.h.meta new file mode 100644 index 0000000..723aa76 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_common.h.meta @@ -0,0 +1,67 @@ +fileFormatVersion: 2 +guid: ecee34f12256cf741857fcb5696b0996 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Android: 1 + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h new file mode 100644 index 0000000..ffef258 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h @@ -0,0 +1,211 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +FFXM_GROUPSHARED FfxUInt32 spdCounter; + +void SpdIncreaseAtomicCounter(FfxUInt32 slice) +{ + SPD_IncreaseAtomicCounter(spdCounter); +} + +FfxUInt32 SpdGetAtomicCounter() +{ + return spdCounter; +} + +void SpdResetAtomicCounter(FfxUInt32 slice) +{ + SPD_ResetAtomicCounter(); +} + +#ifndef SPD_PACKED_ONLY +FFXM_GROUPSHARED FfxFloat32 spdIntermediateR[16][16]; +FFXM_GROUPSHARED FfxFloat32 spdIntermediateG[16][16]; +FFXM_GROUPSHARED FfxFloat32 spdIntermediateB[16][16]; +FFXM_GROUPSHARED FfxFloat32 spdIntermediateA[16][16]; + +FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 tex, FfxUInt32 slice) +{ + FfxFloat32x2 fUv = (tex + 0.5f + Jitter()) / RenderSize(); + fUv = ClampUv(fUv, RenderSize(), InputColorResourceDimensions()); + FfxFloat32x3 fRgb = SampleInputColor(fUv); + + fRgb /= PreExposure(); + + //compute log luma + const FfxFloat32 fLogLuma = log(ffxMax(FSR2_EPSILON, RGBToLuma(fRgb))); + + // Make sure out of screen pixels contribute no value to the end result + const FfxFloat32 result = all(FFXM_LESS_THAN(tex, RenderSize())) ? fLogLuma : 0.0f; + + return FfxFloat32x4(result, 0, 0, 0); +} + +FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice) +{ + return SPD_LoadMipmap5(tex); +} + +void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice) +{ + if (index == LumaMipLevelToUse() || index == 5) + { + SPD_SetMipmap(pix, index, outValue.r); + } + + if (index == MipCount() - 1) { //accumulate on 1x1 level + + if (all(FFXM_EQUAL(pix, FfxInt32x2(0, 0)))) + { + FfxFloat32 prev = SPD_LoadExposureBuffer().y; + FfxFloat32 result = outValue.r; + + if (prev < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values + { + FfxFloat32 rate = 1.0f; + result = prev + (result - prev) * (1 - exp(-DeltaTime() * rate)); + } + FfxFloat32x2 spdOutput = FfxFloat32x2(ComputeAutoExposureFromLavg(result), result); + SPD_SetExposureBuffer(spdOutput); + } + } +} + +FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat32x4( + spdIntermediateR[x][y], + spdIntermediateG[x][y], + spdIntermediateB[x][y], + spdIntermediateA[x][y]); +} +void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) +{ + spdIntermediateR[x][y] = value.x; + spdIntermediateG[x][y] = value.y; + spdIntermediateB[x][y] = value.z; + spdIntermediateA[x][y] = value.w; +} +FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) +{ + return (v0 + v1 + v2 + v3) * 0.25f; +} +#endif + +// define fetch and store functions Packed +#if FFXM_HALF + +FFXM_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16]; +FFXM_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16]; + +FfxFloat16x4 SpdLoadSourceImageH(FfxFloat32x2 tex, FfxUInt32 slice) +{ + FfxFloat16x2 fUv = FfxFloat16x2((tex + 0.5f + Jitter()) / RenderSize()); + fUv = FfxFloat16x2(ClampUv(fUv, RenderSize(), InputColorResourceDimensions())); + FfxFloat16x3 fRgb = FfxFloat16x3(SampleInputColor(fUv)); + + fRgb /= FfxFloat16(PreExposure()); + + //compute log luma + const FfxFloat16 fLogLuma = FfxFloat16(log(ffxMax(FSR2_EPSILON, RGBToLuma(fRgb)))); + + // Make sure out of screen pixels contribute no value to the end result + const FfxFloat16 result = all(FFXM_LESS_THAN(tex, RenderSize())) ? fLogLuma : FfxFloat16(0.0f); + + return FfxFloat16x4(result, 0, 0, 0); +} + +FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice) +{ + return FfxFloat16x4(SPD_LoadMipmap5(p)); +} + +void SpdStoreH(FfxInt32x2 pix, FfxFloat16x4 outValue, FfxUInt32 index, FfxUInt32 slice) +{ + if (index == LumaMipLevelToUse() || index == 5) + { + SPD_SetMipmap(pix, index, outValue.r); + } + + if (index == MipCount() - 1) { //accumulate on 1x1 level + + if (all(FFXM_EQUAL(pix, FfxInt16x2(0, 0)))) + { + FfxFloat16 result = outValue.r; + + // If running with GLES 3.2, remove the smooth exposure transition. +#if !FFXM_SHADER_PLATFORM_GLES_3_2 + FfxFloat16 prev = FfxFloat16(SPD_LoadExposureBuffer().y); + if (prev < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values + { + FfxFloat16 rate = FfxFloat16(1.0f); + result = FfxFloat16(prev + (result - prev) * (1 - exp(-DeltaTime() * rate))); + } +#endif + FfxFloat16x2 spdOutput = FfxFloat16x2(ComputeAutoExposureFromLavg(result), result); + SPD_SetExposureBuffer(spdOutput); + } + } +} + +FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat16x4( + spdIntermediateRG[x][y].x, + spdIntermediateRG[x][y].y, + spdIntermediateBA[x][y].x, + spdIntermediateBA[x][y].y); +} + +void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value) +{ + spdIntermediateRG[x][y] = value.xy; + spdIntermediateBA[x][y] = value.zw; +} + +FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3) +{ + return (v0 + v1 + v2 + v3) * FfxFloat16(0.25); +} +#endif + +#include "./spd/ffxm_spd.h" + +void ComputeAutoExposure(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex) +{ +#if FFXM_HALF + SpdDownsampleH( + FfxUInt32x2(WorkGroupId.xy), + FfxUInt32(LocalThreadIndex), + FfxUInt32(MipCount()), + FfxUInt32(NumWorkGroups()), + FfxUInt32(WorkGroupId.z), + FfxUInt32x2(WorkGroupOffset())); +#else + SpdDownsample( + FfxUInt32x2(WorkGroupId.xy), + FfxUInt32(LocalThreadIndex), + FfxUInt32(MipCount()), + FfxUInt32(NumWorkGroups()), + FfxUInt32(WorkGroupId.z), + FfxUInt32x2(WorkGroupOffset())); +#endif +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h.meta new file mode 100644 index 0000000..7ea9408 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h.meta @@ -0,0 +1,67 @@ +fileFormatVersion: 2 +guid: 53658e1078243f24aa98041b58bf721d +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Android: 1 + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_depth_clip.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_depth_clip.h new file mode 100644 index 0000000..2ef4152 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_depth_clip.h @@ -0,0 +1,349 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#ifndef FFXM_FSR2_DEPTH_CLIP_H +#define FFXM_FSR2_DEPTH_CLIP_H + +// Can casue some temporal instability +#define OPT_PREFETCH_PREVDEPTH_WITH_GATHER 0 + +struct DepthClipOutputs +{ + FfxFloat32x2 fDilatedReactiveMasks; + FfxFloat32x4 fTonemapped; +}; + +FFXM_STATIC const FfxFloat32 DepthClipBaseScale = 4.0f; + +FfxFloat32 ComputeDepthClip(FfxFloat32x2 fUvSample, FfxFloat32 fCurrentDepthSample) +{ + FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(fCurrentDepthSample); + BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUvSample, RenderSize()); + + FfxFloat32 fDilatedSum = 0.0f; + FfxFloat32 fDepth = 0.0f; + FfxFloat32 fWeightSum = 0.0f; + + +#if OPT_PREFETCH_PREVDEPTH_WITH_GATHER + FfxFloat32 fDepthSamples[4]; + GatherReconstructedPreviousDepthRQuad(bilinearInfo.fQuadCenterUv, + fDepthSamples[0], fDepthSamples[1], fDepthSamples[2], fDepthSamples[3]); +#endif + + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) + { + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset; + + if (IsOnScreen(iSamplePos, RenderSize())) + { + const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex]; + if (fWeight > fReconstructedDepthBilinearWeightThreshold) + { +#if OPT_PREFETCH_PREVDEPTH_WITH_GATHER + const FfxFloat32 fPrevDepthSample = fDepthSamples[iSampleIndex]; +#else + const FfxFloat32 fPrevDepthSample = LoadReconstructedPrevDepth(iSamplePos); +#endif + const FfxFloat32 fPrevNearestDepthViewSpace = GetViewSpaceDepth(fPrevDepthSample); + const FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace; + + if (fDepthDiff > 0.0f) { + +#if FFXM_FSR2_OPTION_INVERTED_DEPTH + const FfxFloat32 fPlaneDepth = ffxMin(fPrevDepthSample, fCurrentDepthSample); +#else + const FfxFloat32 fPlaneDepth = ffxMax(fPrevDepthSample, fCurrentDepthSample); +#endif + + const FfxFloat32x3 fCenter = GetViewSpacePosition(FfxInt32x2(RenderSize() * 0.5f), RenderSize(), fPlaneDepth); + const FfxFloat32x3 fCorner = GetViewSpacePosition(FfxInt32x2(0, 0), RenderSize(), fPlaneDepth); + + const FfxFloat32 fHalfViewportWidth = length(FfxFloat32x2(RenderSize())); + const FfxFloat32 fDepthThreshold = ffxMax(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace); + + const FfxFloat32 Ksep = 1.37e-05f; + const FfxFloat32 Kfov = length(fCorner) / length(fCenter); + const FfxFloat32 fRequiredDepthSeparation = Ksep * Kfov * fHalfViewportWidth * fDepthThreshold; + + const FfxFloat32 fResolutionFactor = ffxSaturate(length(FfxFloat32x2(RenderSize())) / length(FfxFloat32x2(1920.0f, 1080.0f))); + const FfxFloat32 fPower = ffxLerp(1.0f, 3.0f, fResolutionFactor); + fDepth += ffxPow(ffxSaturate(FfxFloat32(fRequiredDepthSeparation / fDepthDiff)), fPower) * fWeight; + fWeightSum += fWeight; + } + } + } + } + + return (fWeightSum > 0) ? ffxSaturate(1.0f - fDepth / fWeightSum) : 0.0f; +} + +FfxFloat32 ComputeMotionDivergence(FfxInt32x2 iPxPos, FfxInt32x2 iPxInputMotionVectorSize) +{ + FfxFloat32 minconvergence = 1.0f; + + FfxFloat32x2 fMotionVectorNucleus = LoadInputMotionVector(iPxPos); + FfxFloat32 fNucleusVelocityLr = length(fMotionVectorNucleus * RenderSize()); + FfxFloat32 fMaxVelocityUv = length(fMotionVectorNucleus); + + const FfxFloat32 MotionVectorVelocityEpsilon = 1e-02f; + + + const FfxFloat32x2 fMVSize = FfxFloat32x2(iPxInputMotionVectorSize); + FfxFloat32x2 fPxBaseUv = FfxFloat32x2(iPxPos) / fMVSize; + FfxFloat32x2 fUnitUv = FfxFloat32x2(1.0f, 1.0f) / fMVSize; + + FFXM_MIN16_F2 fMotionVectorSamples[9]; + FFXM_MIN16_F2 fTmpDummy = FFXM_MIN16_F2(0.0f, 0.0f); + GatherInputMotionVectorRGQuad(fPxBaseUv, + fMotionVectorSamples[0], fMotionVectorSamples[1], + fMotionVectorSamples[3], fMotionVectorSamples[4]); + GatherInputMotionVectorRGQuad(fUnitUv + fPxBaseUv, + fTmpDummy, fMotionVectorSamples[5], + fMotionVectorSamples[7], fMotionVectorSamples[8]); + fMotionVectorSamples[2] = LoadInputMotionVector(iPxPos + FfxInt32x2(1, -1)); + fMotionVectorSamples[6] = LoadInputMotionVector(iPxPos + FfxInt32x2(-1, 1)); + + if (fNucleusVelocityLr > MotionVectorVelocityEpsilon) { + for (FfxInt32 y = -1; y <= 1; ++y) + { + for (FfxInt32 x = -1; x <= 1; ++x) + { + FfxInt32 sampleIdx = (y + 1) * 3 + x + 1; + + FfxFloat32x2 fMotionVector = fMotionVectorSamples[sampleIdx]; //LoadInputMotionVector(sp); + FfxFloat32 fVelocityUv = length(fMotionVector); + + fMaxVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv); + fVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv); + minconvergence = ffxMin(minconvergence, dot(fMotionVector / fVelocityUv, fMotionVectorNucleus / fVelocityUv)); + } + } + } + + return ffxSaturate(1.0f - minconvergence) * ffxSaturate(fMaxVelocityUv / 0.01f); +} + +FfxFloat32 ComputeDepthDivergence(FfxInt32x2 iPxPos) +{ + const FfxFloat32 fMaxDistInMeters = GetMaxDistanceInMeters(); + FfxFloat32 fDepthMax = 0.0f; + FfxFloat32 fDepthMin = fMaxDistInMeters; + + FfxInt32 iMaxDistFound = 0; + + FfxInt32x2 iRenderSize = RenderSize(); + const FfxFloat32x2 fRenderSize = FfxFloat32x2(iRenderSize); + FfxFloat32x2 fPxPosBase = FfxFloat32x2(iPxPos) / fRenderSize; + FfxFloat32x2 fUnitUv = FfxFloat32x2(1.0f, 1.0f) / fRenderSize; + + FfxFloat32 fDilatedDepthSamples[9]; + FfxFloat32 fTmpDummy = 0.0f; + GatherDilatedDepthRQuad(fPxPosBase, + fDilatedDepthSamples[0], fDilatedDepthSamples[1], + fDilatedDepthSamples[3], fDilatedDepthSamples[4]); + GatherDilatedDepthRQuad(fUnitUv + fPxPosBase, + fTmpDummy, fDilatedDepthSamples[5], + fDilatedDepthSamples[7], fDilatedDepthSamples[8]); + fDilatedDepthSamples[2] = LoadDilatedDepth(iPxPos + FfxInt32x2(1, -1)); + fDilatedDepthSamples[6] = LoadDilatedDepth(iPxPos + FfxInt32x2(-1, 1)); + + for (FfxInt32 y = -1; y < 2; y++) + { + for (FfxInt32 x = -1; x < 2; x++) + { + FfxInt32 sampleIdx = (y + 1) * 3 + x + 1; + const FfxInt32x2 iOffset = FfxInt32x2(x, y); + const FfxInt32x2 iSamplePos = iPxPos + iOffset; + + const FfxFloat32 fOnScreenFactor = IsOnScreen(iSamplePos, iRenderSize) ? 1.0f : 0.0f; + // FfxFloat32 fDepth = GetViewSpaceDepthInMeters(LoadDilatedDepth(iSamplePos)) * fOnScreenFactor; + FfxFloat32 fDepth = GetViewSpaceDepthInMeters(fDilatedDepthSamples[sampleIdx]) * fOnScreenFactor; + + iMaxDistFound |= FfxInt32(fMaxDistInMeters == fDepth); + + fDepthMin = ffxMin(fDepthMin, fDepth); + fDepthMax = ffxMax(fDepthMax, fDepth); + } + } + + return (1.0f - fDepthMin / fDepthMax) * (FfxBoolean(iMaxDistFound) ? 0.0f : 1.0f); +} + +FfxFloat32 ComputeTemporalMotionDivergence(FfxInt32x2 iPxPos) +{ + const FfxFloat32x2 fUv = FfxFloat32x2(iPxPos + 0.5f) / RenderSize(); + + FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos); + FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; + fReprojectedUv = ClampUv(fReprojectedUv, RenderSize(), MaxRenderSize()); + FfxFloat32x2 fPrevMotionVector = SamplePreviousDilatedMotionVector(fReprojectedUv); + + float fPxDistance = length(fMotionVector * DisplaySize()); + return fPxDistance > 1.0f ? ffxLerp(0.0f, 1.0f - ffxSaturate(length(fPrevMotionVector) / length(fMotionVector)), ffxSaturate(ffxPow(fPxDistance / 20.0f, 3.0f))) : 0; +} + +void PreProcessReactiveMasks(FfxInt32x2 iPxLrPos, FfxFloat32 fMotionDivergence, FFXM_PARAMETER_INOUT DepthClipOutputs results) +{ + // Compensate for bilinear sampling in accumulation pass + + const FfxInt32x2 iRenderSize = RenderSize(); + const FfxFloat32x2 fRenderSize = FfxFloat32x2(iRenderSize); + FfxFloat32x2 fPxPosBase = FfxFloat32x2(iPxLrPos) / fRenderSize; + FfxFloat32x2 fUnitUv = FfxFloat32x2(1.0f, 1.0f) / fRenderSize; + + FFXM_MIN16_F2 fReactiveFactor = FFXM_MIN16_F2(0.0f, fMotionDivergence); + FFXM_MIN16_F fMasksSum = FFXM_MIN16_F(0.0f); + + FFXM_MIN16_F fTmpDummy = FFXM_MIN16_F(0.0f); + // Reactive samples + FFXM_MIN16_F fReactiveSamples[9]; + GatherReactiveRQuad(fPxPosBase, + fReactiveSamples[0], fReactiveSamples[1], + fReactiveSamples[3], fReactiveSamples[4]); + GatherReactiveRQuad(fUnitUv + fPxPosBase, + fTmpDummy, fReactiveSamples[5], + fReactiveSamples[7], fReactiveSamples[8]); + fReactiveSamples[2] = FFXM_MIN16_F(LoadReactiveMask(iPxLrPos + FfxInt32x2(1, -1))); + fReactiveSamples[6] = FFXM_MIN16_F(LoadReactiveMask(iPxLrPos + FfxInt32x2(-1, 1))); + + // Transparency and composition mask samples + FFXM_MIN16_F fTransparencyAndCompositionSamples[9]; + GatherTransparencyAndCompositionMaskRQuad(fPxPosBase, + fTransparencyAndCompositionSamples[0], fTransparencyAndCompositionSamples[1], + fTransparencyAndCompositionSamples[3], fTransparencyAndCompositionSamples[4]); + GatherTransparencyAndCompositionMaskRQuad(fUnitUv + fPxPosBase, + fTmpDummy, fTransparencyAndCompositionSamples[5], + fTransparencyAndCompositionSamples[7], fTransparencyAndCompositionSamples[8]); + fTransparencyAndCompositionSamples[2] = FFXM_MIN16_F(LoadTransparencyAndCompositionMask(iPxLrPos + FfxInt32x2(1, -1))); + fTransparencyAndCompositionSamples[6] = FFXM_MIN16_F(LoadTransparencyAndCompositionMask(iPxLrPos + FfxInt32x2(-1, 1))); + + FFXM_UNROLL + for (FfxInt32 y = -1; y < 2; y++) + { + FFXM_UNROLL + for (FfxInt32 x = -1; x < 2; x++) + { + FfxInt32 sampleIdx = (y + 1) * 3 + x + 1; + fMasksSum += (fReactiveSamples[sampleIdx] + fTransparencyAndCompositionSamples[sampleIdx]); + } + } + + if (fMasksSum > FFXM_MIN16_F(0)) + { + const FfxFloat32x2 InputColorSize = FfxFloat32x2(InputColorResourceDimensions()); + FfxFloat32x2 Base = FfxFloat32x2(iPxLrPos) / InputColorSize; + FFXM_MIN16_F3 fInputColorSamples[9]; + // Input color samples + GatherInputColorRGBQuad(Base, + fInputColorSamples[0], fInputColorSamples[1], fInputColorSamples[3], fInputColorSamples[4]); + fInputColorSamples[2] = LoadInputColor(iPxLrPos + FfxInt32x2(1, -1)); + fInputColorSamples[5] = LoadInputColor(iPxLrPos + FfxInt32x2(1, 0) ); + fInputColorSamples[6] = LoadInputColor(iPxLrPos + FfxInt32x2(-1, 1)); + fInputColorSamples[7] = LoadInputColor(iPxLrPos + FfxInt32x2(0, 1) ); + fInputColorSamples[8] = LoadInputColor(iPxLrPos + FfxInt32x2(1, 1) ); + + FFXM_MIN16_F3 fReferenceColor = fInputColorSamples[4]; + + for (FfxInt32 sampleIdx = 0; sampleIdx < 9; sampleIdx++) + { + FFXM_MIN16_F3 fColorSample = fInputColorSamples[sampleIdx]; + FFXM_MIN16_F fReactiveSample = fReactiveSamples[sampleIdx]; + FFXM_MIN16_F fTransparencyAndCompositionSample = fTransparencyAndCompositionSamples[sampleIdx]; + + const FfxFloat32 fMaxLenSq = ffxMax(dot(fReferenceColor, fReferenceColor), dot(fColorSample, fColorSample)); + const FFXM_MIN16_F fSimilarity = dot(fReferenceColor, fColorSample) / fMaxLenSq; + + // Increase power for non-similar samples + const FFXM_MIN16_F fPowerBiasMax = FFXM_MIN16_F(6.0f); + const FFXM_MIN16_F fSimilarityPower = FFXM_MIN16_F(1.0f + (fPowerBiasMax - fSimilarity * fPowerBiasMax)); + const FFXM_MIN16_F fWeightedReactiveSample = ffxPow(fReactiveSample, fSimilarityPower); + const FFXM_MIN16_F fWeightedTransparencyAndCompositionSample = ffxPow(fTransparencyAndCompositionSample, fSimilarityPower); + + fReactiveFactor = ffxMax(fReactiveFactor, FFXM_MIN16_F2(fWeightedReactiveSample, fWeightedTransparencyAndCompositionSample)); + } + } + + results.fDilatedReactiveMasks = fReactiveFactor; +} + +FfxFloat32x3 ComputePreparedInputColor(FfxInt32x2 iPxLrPos) +{ + //We assume linear data. if non-linear input (sRGB, ...), + //then we should convert to linear first and back to sRGB on output. + FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos)); + + fRgb = PrepareRgb(fRgb, Exposure(), PreExposure()); + +#if FFXM_SHADER_QUALITY_OPT_TONEMAPPED_RGB_PREPARED_INPUT_COLOR + const FfxFloat32x3 fPreparedYCoCg = Tonemap(fRgb); +#else + const FfxFloat32x3 fPreparedYCoCg = RGBToYCoCg(fRgb); +#endif + + return fPreparedYCoCg; +} + +FfxFloat32 EvaluateSurface(FfxInt32x2 iPxPos, FfxFloat32x2 fMotionVector) +{ + FfxFloat32 d0 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, -1))); + FfxFloat32 d1 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 0))); + FfxFloat32 d2 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 1))); + + return 1.0f - FfxFloat32(((d0 - d1) > (d1 * 0.01f)) && ((d1 - d2) > (d2 * 0.01f))); +} + +DepthClipOutputs DepthClip(FfxInt32x2 iPxPos) +{ + FfxFloat32x2 fDepthUv = (iPxPos + 0.5f) / RenderSize(); + FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos); + + // Discard tiny mvs + fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.01f); + + const FfxFloat32x2 fDilatedUv = fDepthUv + fMotionVector; + const FfxFloat32 fDilatedDepth = LoadDilatedDepth(iPxPos); + const FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(LoadInputDepth(iPxPos)); + + DepthClipOutputs results; + + // Compute prepared input color and depth clip + FfxFloat32 fDepthClip = ComputeDepthClip(fDilatedUv, fDilatedDepth) * EvaluateSurface(iPxPos, fMotionVector); + FfxFloat32x3 fPreparedYCoCg = ComputePreparedInputColor(iPxPos); + results.fTonemapped = FfxFloat32x4(fPreparedYCoCg, fDepthClip); + + // Compute dilated reactive mask +#if FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS + FfxInt32x2 iSamplePos = iPxPos; +#else + FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxPos); +#endif + + FfxFloat32 fMotionDivergence = ComputeMotionDivergence(iSamplePos, RenderSize()); + FfxFloat32 fTemporalMotionDifference = ffxSaturate(ComputeTemporalMotionDivergence(iPxPos) - ComputeDepthDivergence(iPxPos)); + + PreProcessReactiveMasks(iPxPos, ffxMax(fTemporalMotionDifference, fMotionDivergence), results); + + return results; +} + +#endif //!defined( FFXM_FSR2_DEPTH_CLIPH ) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_depth_clip.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_depth_clip.h.meta new file mode 100644 index 0000000..c35e41a --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_depth_clip.h.meta @@ -0,0 +1,67 @@ +fileFormatVersion: 2 +guid: 6578e7c7d02073e48926d1974b4d6c92 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Android: 1 + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h new file mode 100644 index 0000000..b78afdc --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h @@ -0,0 +1,131 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#ifndef FFXM_FSR2_LOCK_H +#define FFXM_FSR2_LOCK_H + +void ClearResourcesForNextFrame(in FfxInt32x2 iPxHrPos) +{ + if (all(FFXM_LESS_THAN(iPxHrPos, FfxInt32x2(RenderSize())))) + { +#if FFXM_FSR2_OPTION_INVERTED_DEPTH + const FfxUInt32 farZ = 0x0; +#else + const FfxUInt32 farZ = 0x3f800000; +#endif + SetReconstructedDepth(iPxHrPos, farZ); + } +} + +FfxBoolean ComputeThinFeatureConfidence(FfxInt32x2 pos) +{ + const FfxInt32 RADIUS = 1; + + FFXM_MIN16_F fNucleus = LoadLockInputLuma(pos); + + FFXM_MIN16_F similar_threshold = FFXM_MIN16_F(1.05f); + FFXM_MIN16_F dissimilarLumaMin = FFXM_MIN16_F(FSR2_FP16_MAX); + FFXM_MIN16_F dissimilarLumaMax = FFXM_MIN16_F(0); + + /* + 0 1 2 + 3 4 5 + 6 7 8 + */ + + #define SETBIT(x) (1U << x) + + FfxUInt32 mask = SETBIT(4); //flag fNucleus as similar + + const FfxUInt32 uNumRejectionMasks = 4; + const FfxUInt32 uRejectionMasks[uNumRejectionMasks] = { + SETBIT(0) | SETBIT(1) | SETBIT(3) | SETBIT(4), //Upper left + SETBIT(1) | SETBIT(2) | SETBIT(4) | SETBIT(5), //Upper right + SETBIT(3) | SETBIT(4) | SETBIT(6) | SETBIT(7), //Lower left + SETBIT(4) | SETBIT(5) | SETBIT(7) | SETBIT(8), //Lower right + }; + + FFXM_MIN16_F lumaSamples [9]; + FFXM_MIN16_F fTmpDummy = FFXM_MIN16_F(0.0f); + const FfxFloat32x2 fInputLumaSize = FfxFloat32x2(RenderSize()); + const FfxFloat32x2 fPxBaseUv = FfxFloat32x2(pos) / fInputLumaSize; + const FfxFloat32x2 fUnitUv = FfxFloat32x2(1.0f, 1.0f) / fInputLumaSize; + + // Gather samples + GatherLockInputLumaRQuad(fPxBaseUv, + lumaSamples[0], lumaSamples[1], + lumaSamples[3], lumaSamples[4]); + GatherLockInputLumaRQuad(fUnitUv + fPxBaseUv, + fTmpDummy, lumaSamples[5], + lumaSamples[7], lumaSamples[8]); + lumaSamples[2] = LoadLockInputLuma(pos + FfxInt32x2(1, -1)); + lumaSamples[6] = LoadLockInputLuma(pos + FfxInt32x2(-1, 1)); + + FfxInt32 idx = 0; + FFXM_UNROLL + for (FfxInt32 y = -RADIUS; y <= RADIUS; y++) { + FFXM_UNROLL + for (FfxInt32 x = -RADIUS; x <= RADIUS; x++, idx++) { + if (x == 0 && y == 0) continue; + + FfxInt32 sampleIdx = (y + 1) * 3 + x + 1; + FFXM_MIN16_F sampleLuma = lumaSamples[sampleIdx]; + + FFXM_MIN16_F difference = ffxMax(sampleLuma, fNucleus) / ffxMin(sampleLuma, fNucleus); + + if (difference > FFXM_MIN16_F(0) && (difference < similar_threshold)) { + mask |= SETBIT(idx); + } else { + dissimilarLumaMin = ffxMin(dissimilarLumaMin, sampleLuma); + dissimilarLumaMax = ffxMax(dissimilarLumaMax, sampleLuma); + } + } + } + + FfxBoolean isRidge = fNucleus > dissimilarLumaMax || fNucleus < dissimilarLumaMin; + + if (FFXM_FALSE == isRidge) { + + return false; + } + + FFXM_UNROLL + for (FfxInt32 i = 0; i < 4; i++) { + + if ((mask & uRejectionMasks[i]) == uRejectionMasks[i]) { + return false; + } + } + + return true; +} + +void ComputeLock(FfxInt32x2 iPxLrPos) +{ + if (ComputeThinFeatureConfidence(iPxLrPos)) + { + StoreNewLocks(ComputeHrPosFromLrPos(iPxLrPos), 1.f); + } + + ClearResourcesForNextFrame(iPxLrPos); +} + +#endif // FFXM_FSR2_LOCK_H diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h.meta new file mode 100644 index 0000000..f399aac --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h.meta @@ -0,0 +1,67 @@ +fileFormatVersion: 2 +guid: 920d5b937231132469bcb0f2a38d2d80 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Android: 1 + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_postprocess_lock_status.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_postprocess_lock_status.h new file mode 100644 index 0000000..8d6e0bf --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_postprocess_lock_status.h @@ -0,0 +1,101 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#ifndef FFXM_FSR2_POSTPROCESS_LOCK_STATUS_H +#define FFXM_FSR2_POSTPROCESS_LOCK_STATUS_H + +FfxFloat32x4 WrapShadingChangeLuma(FfxInt32x2 iPxSample) +{ + return FfxFloat32x4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0); +} + +#if FFXM_HALF +FFXM_MIN16_F4 WrapShadingChangeLuma(FFXM_MIN16_I2 iPxSample) +{ + return FFXM_MIN16_F4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0); +} +#endif + +#if FFXM_HALF +DeclareCustomFetchBilinearSamplesMin16(FetchShadingChangeLumaSamples, WrapShadingChangeLuma) +DeclareCustomTextureSampleMin16(ShadingChangeLumaSample, Bilinear, FetchShadingChangeLumaSamples) +#else +DeclareCustomFetchBicubicSamples(FetchShadingChangeLumaSamples, WrapShadingChangeLuma) +DeclareCustomTextureSample(ShadingChangeLumaSample, Lanczos2, FetchShadingChangeLumaSamples) +#endif + +FfxFloat32 GetShadingChangeLuma(FfxInt32x2 iPxHrPos, FfxFloat32x2 fUvCoord) +{ + FfxFloat32 fShadingChangeLuma = 0; + const FfxFloat32 fDiv = FfxFloat32(FfxInt32(2) << LumaMipLevelToUse()); + FfxInt32x2 iMipRenderSize = FfxInt32x2(RenderSize() / fDiv); + + fUvCoord = ClampUv(fUvCoord, iMipRenderSize, LumaMipDimensions()); + fShadingChangeLuma = Exposure() * exp(FfxFloat32(SampleMipLuma(fUvCoord, LumaMipLevelToUse()))); + + fShadingChangeLuma = ffxPow(fShadingChangeLuma, 1.0f / 6.0f); + + return fShadingChangeLuma; +} + +void UpdateLockStatus(AccumulationPassCommonParams params, + FFXM_PARAMETER_INOUT FfxFloat32 fReactiveFactor, LockState state, + FFXM_PARAMETER_INOUT FfxFloat32x2 fLockStatus, + FFXM_PARAMETER_OUT FfxFloat32 fLockContributionThisFrame, + FFXM_PARAMETER_OUT FfxFloat32 fLuminanceDiff) { + + const FfxFloat32 fShadingChangeLuma = GetShadingChangeLuma(params.iPxHrPos, params.fHrUv); + + //init temporal shading change factor, init to -1 or so in reproject to know if "true new"? + fLockStatus[LOCK_TEMPORAL_LUMA] = (fLockStatus[LOCK_TEMPORAL_LUMA] == FfxFloat32(0.0f)) ? fShadingChangeLuma : fLockStatus[LOCK_TEMPORAL_LUMA]; + + FfxFloat32 fPreviousShadingChangeLuma = fLockStatus[LOCK_TEMPORAL_LUMA]; + + fLuminanceDiff = 1.0f - MinDividedByMax(fPreviousShadingChangeLuma, fShadingChangeLuma); + + if (state.NewLock) { + fLockStatus[LOCK_TEMPORAL_LUMA] = fShadingChangeLuma; + + fLockStatus[LOCK_LIFETIME_REMAINING] = (fLockStatus[LOCK_LIFETIME_REMAINING] != 0.0f) ? 2.0f : 1.0f; + } + else if(fLockStatus[LOCK_LIFETIME_REMAINING] <= 1.0f) { + fLockStatus[LOCK_TEMPORAL_LUMA] = ffxLerp(fLockStatus[LOCK_TEMPORAL_LUMA], FfxFloat32(fShadingChangeLuma), 0.5f); + } + else { + if (fLuminanceDiff > 0.1f) { + KillLock(fLockStatus); + } + } + + fReactiveFactor = ffxMax(fReactiveFactor, ffxSaturate((fLuminanceDiff - 0.1f) * 10.0f)); + fLockStatus[LOCK_LIFETIME_REMAINING] *= (1.0f - fReactiveFactor); + + fLockStatus[LOCK_LIFETIME_REMAINING] *= ffxSaturate(1.0f - params.fAccumulationMask); + fLockStatus[LOCK_LIFETIME_REMAINING] *= FfxFloat32(params.fDepthClipFactor < 0.1f); + + // Compute this frame lock contribution + const FfxFloat32 fLifetimeContribution = ffxSaturate(fLockStatus[LOCK_LIFETIME_REMAINING] - 1.0f); + const FfxFloat32 fShadingChangeContribution = ffxSaturate(MinDividedByMax(fLockStatus[LOCK_TEMPORAL_LUMA], fShadingChangeLuma)); + + fLockContributionThisFrame = ffxSaturate(ffxSaturate(fLifetimeContribution * 4.0f) * fShadingChangeContribution); +} + +#endif //!defined( FFXM_FSR2_POSTPROCESS_LOCK_STATUS_H ) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_postprocess_lock_status.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_postprocess_lock_status.h.meta new file mode 100644 index 0000000..b5dce57 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_postprocess_lock_status.h.meta @@ -0,0 +1,67 @@ +fileFormatVersion: 2 +guid: 976d18e7892c5c444bbcb4d17322fefb +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Android: 1 + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_rcas.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_rcas.h new file mode 100644 index 0000000..d60784b --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_rcas.h @@ -0,0 +1,91 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#define GROUP_SIZE 8 +#define FSR_RCAS_DENOISE 1 + +#include "./ffxm_core.h" + +struct RCASOutputs +{ + FfxFloat32x3 fUpscaledColor; +}; + +#if FFXM_HALF +#define USE_FSR_RCASH 1 +#else +#define USE_FSR_RCASH 0 +#endif + +#if USE_FSR_RCASH +#define FSR_RCAS_H 1 +FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p) +{ + FfxFloat16x4 fColor = LoadRCAS_Input(p); + fColor.rgb = FfxFloat16x3(PrepareRgb(fColor.rgb, Exposure(), PreExposure())); + return fColor; +} +void FsrRcasInputH(inout FfxFloat16 r,inout FfxFloat16 g,inout FfxFloat16 b) +{ + +} + +#else +#define FSR_RCAS_F 1 +FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p) +{ + FfxFloat32x4 fColor = LoadRCAS_Input(p); + + fColor.rgb = PrepareRgb(fColor.rgb, Exposure(), PreExposure()); + + return fColor; +} +void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {} +#endif + +#include "./fsr1/ffxm_fsr1.h" + +void CurrFilter(FFXM_MIN16_U2 pos, FFXM_PARAMETER_INOUT RCASOutputs results) +{ +#if USE_FSR_RCASH + FfxFloat16x3 c; + FsrRcasH(c.r, c.g, c.b, pos, RCASConfig()); + + c = UnprepareRgb(c, FfxFloat16(Exposure())); +#else + FfxFloat32x3 c; + FsrRcasF(c.r, c.g, c.b, pos, RCASConfig()); + + c = UnprepareRgb(c, Exposure()); +#endif + results.fUpscaledColor = c; +} + +RCASOutputs RCAS(FfxUInt32x2 gxy) +{ +#ifdef FFXM_HLSL + RCASOutputs results = (RCASOutputs)0; +#else + RCASOutputs results; +#endif + CurrFilter(FFXM_MIN16_U2(gxy), results); + return results; +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_rcas.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_rcas.h.meta new file mode 100644 index 0000000..73e1f49 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_rcas.h.meta @@ -0,0 +1,67 @@ +fileFormatVersion: 2 +guid: 6113e44b0d068db4c954804a6ce38739 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Android: 1 + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reconstruct_dilated_velocity_and_previous_depth.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reconstruct_dilated_velocity_and_previous_depth.h new file mode 100644 index 0000000..59bf246 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reconstruct_dilated_velocity_and_previous_depth.h @@ -0,0 +1,155 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#ifndef FFXM_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H +#define FFXM_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H + +struct ReconstructPrevDepthOutputs +{ + FfxFloat32 fDepth; + FfxFloat32x2 fMotionVector; + FfxFloat32 fLuma; +}; + + +void ReconstructPrevDepth(FfxInt32x2 iPxPos, FfxFloat32 fDepth, FfxFloat32x2 fMotionVector, FfxInt32x2 iPxDepthSize) +{ + fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.1f); + + FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5)) / iPxDepthSize; + FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; + + BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, RenderSize()); + + // Project current depth into previous frame locations. + // Push to all pixels having some contribution if reprojection is using bilinear logic. + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) { + + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex]; + + if (fWeight > fReconstructedDepthBilinearWeightThreshold) { + + FfxInt32x2 iStorePos = bilinearInfo.iBasePos + iOffset; + if (IsOnScreen(iStorePos, iPxDepthSize)) { + StoreReconstructedDepth(iStorePos, fDepth); + } + } + } +} + +void FindNearestDepth(FFXM_PARAMETER_IN FfxInt32x2 iPxPos, FFXM_PARAMETER_IN FfxInt32x2 iPxSize, FFXM_PARAMETER_OUT FfxFloat32 fNearestDepth, FFXM_PARAMETER_OUT FfxInt32x2 fNearestDepthCoord) +{ + const FfxInt32 iSampleCount = 9; + const FfxInt32x2 iSampleOffsets[iSampleCount] = { + FfxInt32x2(+0, +0), + FfxInt32x2(+1, +0), + FfxInt32x2(+0, +1), + FfxInt32x2(+0, -1), + FfxInt32x2(-1, +0), + FfxInt32x2(-1, +1), + FfxInt32x2(+1, +1), + FfxInt32x2(-1, -1), + FfxInt32x2(+1, -1), + }; + + // pull out the depth loads to allow SC to batch them + FfxFloat32 depth[9]; + FfxInt32 iSampleIndex = 0; + FFXM_UNROLL + for (iSampleIndex = 0; iSampleIndex < iSampleCount; ++iSampleIndex) { + + FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex]; + depth[iSampleIndex] = LoadInputDepth(iPos); + } + + // find closest depth + fNearestDepthCoord = iPxPos; + fNearestDepth = depth[0]; + FFXM_UNROLL + for (iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex) { + + FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex]; + if (IsOnScreen(iPos, iPxSize)) { + + FfxFloat32 fNdDepth = depth[iSampleIndex]; +#if FFXM_FSR2_OPTION_INVERTED_DEPTH + if (fNdDepth > fNearestDepth) { +#else + if (fNdDepth < fNearestDepth) { +#endif + fNearestDepthCoord = iPos; + fNearestDepth = fNdDepth; + } + } + } +} + +FfxFloat32 ComputeLockInputLuma(FfxInt32x2 iPxLrPos) +{ + //We assume linear data. if non-linear input (sRGB, ...), + //then we should convert to linear first and back to sRGB on output. + FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos)); + + // Use internal auto exposure for locking logic + fRgb /= PreExposure(); + fRgb *= Exposure(); + +#if FFXM_FSR2_OPTION_HDR_COLOR_INPUT + fRgb = Tonemap(fRgb); +#endif + + //compute luma used to lock pixels, if used elsewhere the ffxPow must be moved! + const FfxFloat32 fLockInputLuma = ffxPow(RGBToPerceivedLuma(fRgb), FfxFloat32(1.0 / 6.0)); + + return fLockInputLuma; +} + +ReconstructPrevDepthOutputs ReconstructAndDilate(FfxInt32x2 iPxLrPos) +{ + FfxFloat32 fDilatedDepth; + FfxInt32x2 iNearestDepthCoord; + + FindNearestDepth(iPxLrPos, RenderSize(), fDilatedDepth, iNearestDepthCoord); + +#if FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS + FfxInt32x2 iSamplePos = iPxLrPos; + FfxInt32x2 iMotionVectorPos = iNearestDepthCoord; +#else + FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxLrPos); + FfxInt32x2 iMotionVectorPos = ComputeHrPosFromLrPos(iNearestDepthCoord); +#endif + + FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iMotionVectorPos); + + ReconstructPrevDepthOutputs results; + + results.fDepth = fDilatedDepth; + results.fMotionVector = fDilatedMotionVector; + ReconstructPrevDepth(iPxLrPos, fDilatedDepth, fDilatedMotionVector, RenderSize()); + FfxFloat32 fLockInputLuma = ComputeLockInputLuma(iPxLrPos); + results.fLuma = fLockInputLuma; + + return results; +} + + +#endif //!defined( FFXM_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H ) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reconstruct_dilated_velocity_and_previous_depth.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reconstruct_dilated_velocity_and_previous_depth.h.meta new file mode 100644 index 0000000..945e0ce --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reconstruct_dilated_velocity_and_previous_depth.h.meta @@ -0,0 +1,67 @@ +fileFormatVersion: 2 +guid: 5e29326796d407b41b4d8a450bbb8fac +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Android: 1 + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h new file mode 100644 index 0000000..752a39a --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h @@ -0,0 +1,386 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#ifndef FFXM_FSR2_REPROJECT_H +#define FFXM_FSR2_REPROJECT_H + +#if FFXM_HALF +FFXM_MIN16_F4 WrapHistory(FFXM_MIN16_I2 iPxSample) +{ + return FFXM_MIN16_F4(LoadHistory(iPxSample)); +} +FFXM_MIN16_F4 SampleHistory(FfxFloat32x2 fUV) +{ + return SampleUpscaledHistory(fUV); +} +#else +FfxFloat32x4 WrapHistory(FfxInt32x2 iPxSample) +{ + return LoadHistory(iPxSample); +} +FfxFloat32x4 SampleHistory(FfxFloat32x2 fUV) +{ + return SampleUpscaledHistory(fUV); +} +#endif + + +#if FFXM_HALF + +#define FFXM_FSR2_REPROJECT_CATMULL_9TAP 0 +#define FFXM_FSR2_REPROJECT_LANCZOS_APPROX_9TAP 1 +#define FFXM_FSR2_REPROJECT_CATMULL_5TAP 2 + +#if FFXM_SHADER_QUALITY_OPT_REPROJECT_CATMULL_5TAP +#define FFXM_FSR2_REPROJECT_MODE FFXM_FSR2_REPROJECT_CATMULL_5TAP +#elif FFXM_SHADER_QUALITY_OPT_REPROJECT_CATMULL_9TAP +#define FFXM_FSR2_REPROJECT_MODE FFXM_FSR2_REPROJECT_CATMULL_9TAP +#else // QUALITY +#define FFXM_FSR2_REPROJECT_MODE FFXM_FSR2_REPROJECT_CATMULL_9TAP +#endif + +#if (FFXM_FSR2_REPROJECT_MODE == FFXM_FSR2_REPROJECT_CATMULL_9TAP) +struct CatmullRomSamples9Tap +{ + // bilinear sampling UV coordinates of the samples + FfxFloat32x2 UV[3]; + + // weights of the samples + FFXM_MIN16_F2 Weight[3]; + + // final multiplier (it is faster to multiply 3 RGB values than reweights the 5 weights) + FFXM_MIN16_F FinalMultiplier; +}; + +CatmullRomSamples9Tap Get2DCatmullRom9Kernel(FfxFloat32x2 uv, FfxFloat32x2 size, in FfxFloat32x2 invSize) +{ + CatmullRomSamples9Tap catmullSamples; + FfxFloat32x2 samplePos = uv * size; + FfxFloat32x2 texPos1 = floor(samplePos - 0.5f) + 0.5f; + FfxFloat32x2 f = samplePos - texPos1; + + FfxFloat32x2 w0 = f * (-0.5f + f * (1.0f - 0.5f * f)); + FfxFloat32x2 w1 = 1.0f + f * f * (-2.5f + 1.5f * f); + FfxFloat32x2 w2 = f * (0.5f + f * (2.0f - 1.5f * f)); + FfxFloat32x2 w3 = f * f * (-0.5f + 0.5f * f); + + catmullSamples.Weight[0] = FFXM_MIN16_F2(w0); + catmullSamples.Weight[1] = FFXM_MIN16_F2(w1 + w2); + catmullSamples.Weight[2] = FFXM_MIN16_F2(w3); + + FfxFloat32x2 offset12 = w2 / (w1 + w2); + + // Compute the final UV coordinates we'll use for sampling the texture + catmullSamples.UV[0] = FfxFloat32x2(texPos1 - 1); + catmullSamples.UV[1] = FfxFloat32x2(texPos1 + 2); + catmullSamples.UV[2] = FfxFloat32x2(texPos1 + offset12); + + catmullSamples.UV[0] = FfxFloat32x2(catmullSamples.UV[0]*invSize); + catmullSamples.UV[1] = FfxFloat32x2(catmullSamples.UV[1]*invSize); + catmullSamples.UV[2] = FfxFloat32x2(catmullSamples.UV[2]*invSize); + return catmullSamples; +} + +FFXM_MIN16_F4 HistorySample(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) +{ + FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); + FfxFloat32x2 fTextureSize = FfxFloat32x2(iTextureSize); + FfxFloat32x2 fInvTextureSize = FfxFloat32x2(1.0f, 1.0f) / fTextureSize; + CatmullRomSamples9Tap samples = Get2DCatmullRom9Kernel(fUvSample, fTextureSize, fInvTextureSize); + + FFXM_MIN16_F4 fColor = FFXM_MIN16_F4(0.0f, 0.0f, 0.0f, 0.0f); + + FFXM_MIN16_F4 fColor00 = SampleHistory(FfxFloat32x2(samples.UV[0])); + fColor += fColor00 * samples.Weight[0].x * samples.Weight[0].y; + FFXM_MIN16_F4 fColor20 = SampleHistory(FfxFloat32x2(samples.UV[2].x, samples.UV[0].y)); + fColor += fColor20 * samples.Weight[1].x * samples.Weight[0].y; + fColor += SampleHistory(FfxFloat32x2(samples.UV[1].x, samples.UV[0].y)) * samples.Weight[2].x * samples.Weight[0].y; + + FFXM_MIN16_F4 fColor02 = SampleHistory(FfxFloat32x2(samples.UV[0].x, samples.UV[2].y)); + fColor += SampleHistory(FfxFloat32x2(samples.UV[0].x, samples.UV[2].y)) * samples.Weight[0].x * samples.Weight[1].y; + FFXM_MIN16_F4 fColor22 = SampleHistory(FfxFloat32x2(samples.UV[2])); + fColor += fColor22 * samples.Weight[1].x * samples.Weight[1].y; + fColor += SampleHistory(FfxFloat32x2(samples.UV[1].x, samples.UV[2].y)) * samples.Weight[2].x * samples.Weight[1].y; + + fColor += SampleHistory(FfxFloat32x2(samples.UV[0].x, samples.UV[1].y)) * samples.Weight[0].x * samples.Weight[2].y; + fColor += SampleHistory(FfxFloat32x2(samples.UV[2].x, samples.UV[1].y)) * samples.Weight[1].x * samples.Weight[2].y; + fColor += SampleHistory(FfxFloat32x2(samples.UV[1])) * samples.Weight[2].x * samples.Weight[2].y; + +#if !FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING + const FFXM_MIN16_F4 fDeringingSamples[4] = {fColor00, fColor20, fColor02, fColor22}; + + FFXM_MIN16_F4 fDeringingMin = fDeringingSamples[0]; + FFXM_MIN16_F4 fDeringingMax = fDeringingSamples[0]; + + FFXM_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < 4; ++iSampleIndex) + { + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + fColor = clamp(fColor, fDeringingMin, fDeringingMax); +#endif + return fColor; +} +#elif (FFXM_FSR2_REPROJECT_MODE == FFXM_FSR2_REPROJECT_CATMULL_5TAP) +#define ARM_CATMULL_5TAP_SAMPLE_COUNT 5 +struct CatmullRomSamples +{ + // bilinear sampling UV coordinates of the samples + FfxFloat32x2 UV[ARM_CATMULL_5TAP_SAMPLE_COUNT]; + // weights of the samples + FFXM_MIN16_F Weight[ARM_CATMULL_5TAP_SAMPLE_COUNT]; + // final multiplier (it is faster to multiply 3 RGB values than reweights the 5 weights) + FFXM_MIN16_F FinalMultiplier; +}; + +void Bicubic2DCatmullRom(in FfxFloat32x2 uv, in FfxFloat32x2 size, in FfxFloat32x2 invSize, FFXM_PARAMETER_OUT FfxFloat32x2 samples[3], FFXM_PARAMETER_OUT FfxFloat32x2 weights[3]) +{ + uv *= size; + FfxFloat32x2 tc = floor(uv - 0.5) + 0.5; + FfxFloat32x2 f = uv - tc; + FfxFloat32x2 f2 = f * f; + FfxFloat32x2 f3 = f2 * f; + FfxFloat32x2 w0 = f2 - 0.5 * (f3 + f); + FfxFloat32x2 w1 = 1.5 * f3 - 2.5 * f2 + 1.f; + FfxFloat32x2 w3 = 0.5 * (f3 - f2); + FfxFloat32x2 w2 = 1.f - w0 - w1 - w3; + + samples[0] = tc - 1.f; + samples[1] = tc + w2 / (w1 + w2); + samples[2] = tc + 2.f; + + samples[0] *= invSize; + samples[1] *= invSize; + samples[2] *= invSize; + weights[0] = w0; + weights[1] = w1 + w2; + weights[2] = w3; +} + +CatmullRomSamples GetBicubic2DCatmullRomSamples(FfxFloat32x2 uv, FfxFloat32x2 size, in FfxFloat32x2 invSize) +{ + FfxFloat32x2 weights[3]; + FfxFloat32x2 samples[3]; + Bicubic2DCatmullRom(uv, size, invSize, samples, weights); + + CatmullRomSamples crSamples; + // optimized by removing corner samples + crSamples.UV[0] = FfxFloat32x2(samples[1].x, samples[0].y); + crSamples.UV[1] = FfxFloat32x2(samples[0].x, samples[1].y); + crSamples.UV[2] = FfxFloat32x2(samples[1].x, samples[1].y); + crSamples.UV[3] = FfxFloat32x2(samples[2].x, samples[1].y); + crSamples.UV[4] = FfxFloat32x2(samples[1].x, samples[2].y); + + crSamples.Weight[0] = FFXM_MIN16_F(weights[1].x * weights[0].y); + crSamples.Weight[1] = FFXM_MIN16_F(weights[0].x * weights[1].y); + crSamples.Weight[2] = FFXM_MIN16_F(weights[1].x * weights[1].y); + crSamples.Weight[3] = FFXM_MIN16_F(weights[2].x * weights[1].y); + crSamples.Weight[4] = FFXM_MIN16_F(weights[1].x * weights[2].y); + + // reweight after removing the corners + FFXM_MIN16_F cornerWeights; + cornerWeights = crSamples.Weight[0]; + cornerWeights += crSamples.Weight[1]; + cornerWeights += crSamples.Weight[2]; + cornerWeights += crSamples.Weight[3]; + cornerWeights += crSamples.Weight[4]; + crSamples.FinalMultiplier = FFXM_MIN16_F(1.f / cornerWeights); + return crSamples; +} + +FFXM_MIN16_F4 HistorySample(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) +{ + FfxFloat32x2 fTextureSize = FfxFloat32x2(iTextureSize); + FfxFloat32x2 fInvTextureSize = FfxFloat32x2(1.0f, 1.0f) / fTextureSize; + CatmullRomSamples samples = GetBicubic2DCatmullRomSamples(fUvSample, fTextureSize, fInvTextureSize); + + FFXM_MIN16_F4 fColor = FFXM_MIN16_F4(0.0f, 0.0f, 0.0f, 0.0f); + fColor = SampleHistory(FfxFloat32x2(samples.UV[0])) * samples.Weight[0]; +#if !FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING + FFXM_MIN16_F4 fDeringingMin = fColor; + FFXM_MIN16_F4 fDeringingMax = fColor; +#endif + for(FfxInt32 iSampleIndex = 1; iSampleIndex < ARM_CATMULL_5TAP_SAMPLE_COUNT; iSampleIndex++) + { + FFXM_MIN16_F4 fSample = SampleHistory(FfxFloat32x2(samples.UV[iSampleIndex])) * samples.Weight[iSampleIndex]; + fColor += fSample; +#if !FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING + fDeringingMin = ffxMin(fDeringingMin, fSample); + fDeringingMax = ffxMax(fDeringingMax, fSample); +#endif + } + +#if !FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING + fColor = clamp(fColor, fDeringingMin, fDeringingMax); +#endif + return fColor; +} +#elif (FFXM_FSR2_REPROJECT_MODE == FFXM_FSR2_REPROJECT_LANCZOS_APPROX_9TAP) + +Fetched9TapSamplesMin16 FetchHistorySamples(FfxInt32x2 iPxSample, FfxInt32x2 iTextureSize) +{ + Fetched9TapSamplesMin16 Samples; + FfxFloat32x2 iSrcInputUv = FfxFloat32x2(iPxSample) / FfxFloat32x2(iTextureSize); + FfxFloat32x2 unitOffsetUv = FfxFloat32x2(1.0f, 1.0f) / FfxFloat32x2(iTextureSize); + + // Collect samples + GatherHistoryColorRGBQuad(FfxFloat32x2(-0.5, -0.5) * unitOffsetUv + iSrcInputUv, + Samples.fColor00, Samples.fColor10, Samples.fColor01, Samples.fColor11); + Samples.fColor20 = WrapHistory(FfxFloat32x2(1, -1) + iPxSample); + Samples.fColor21 = WrapHistory(FfxFloat32x2(1, 0) + iPxSample); + Samples.fColor02 = WrapHistory(FfxFloat32x2(-1, 1) + iPxSample); + Samples.fColor12 = WrapHistory(FfxFloat32x2(0, 1) + iPxSample); + Samples.fColor22 = WrapHistory(FfxFloat32x2(1, 1) + iPxSample); + + return Samples; +} +//DeclareCustomFetch9TapSamplesMin16(FetchHistorySamples, WrapHistory) +DeclareCustomTextureSampleMin16(HistorySample, Lanczos2Approx, FetchHistorySamples) +#endif // FFXM_FSR2_REPROJECT_MODE + +#else // !FFXM_HALF + +#ifndef FFXM_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE +#define FFXM_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE 0 // Reference +#endif +DeclareCustomFetchBicubicSamples(FetchHistorySamples, WrapHistory) +DeclareCustomTextureSample(HistorySample, FFXM_FSR2_GET_LANCZOS_SAMPLER1D(FFXM_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples) +#endif + +FfxFloat32x4 WrapLockStatus(FfxInt32x2 iPxSample) +{ + FfxFloat32x4 fSample = FfxFloat32x4(LoadLockStatus(iPxSample), 0.0f, 0.0f); + return fSample; +} + +#if FFXM_HALF +FFXM_MIN16_F4 WrapLockStatus(FFXM_MIN16_I2 iPxSample) +{ + FFXM_MIN16_F4 fSample = FFXM_MIN16_F4(LoadLockStatus(iPxSample), 0.0, 0.0); + + return fSample; +} +#endif + +#if FFXM_HALF +DeclareCustomFetchBilinearSamplesMin16(FetchLockStatusSamples, WrapLockStatus) +DeclareCustomTextureSampleMin16(LockStatusSample, Bilinear, FetchLockStatusSamples) +#else +DeclareCustomFetchBilinearSamples(FetchLockStatusSamples, WrapLockStatus) +DeclareCustomTextureSample(LockStatusSample, Bilinear, FetchLockStatusSamples) +#endif + +FfxFloat32x2 GetMotionVector(FfxInt32x2 iPxHrPos, FfxFloat32x2 fHrUv) +{ +#if FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS + FfxFloat32x2 fDilatedMotionVector = LoadDilatedMotionVector(FfxInt32x2(fHrUv * RenderSize())); +#else + FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iPxHrPos); +#endif + + return fDilatedMotionVector; +} + +FfxBoolean IsUvInside(FfxFloat32x2 fUv) +{ + return (fUv.x >= 0.0f && fUv.x <= 1.0f) && (fUv.y >= 0.0f && fUv.y <= 1.0f); +} + +void ComputeReprojectedUVs(const AccumulationPassCommonParams params, FFXM_PARAMETER_OUT FfxFloat32x2 fReprojectedHrUv, FFXM_PARAMETER_OUT FfxBoolean bIsExistingSample) +{ + fReprojectedHrUv = params.fHrUv + params.fMotionVector; + + bIsExistingSample = IsUvInside(fReprojectedHrUv); +} + +#if !FFXM_HALF +void ReprojectHistoryColor(const AccumulationPassCommonParams params, FFXM_PARAMETER_OUT FfxFloat32x3 fHistoryColor, FFXM_PARAMETER_OUT FfxFloat32 fTemporalReactiveFactor, FFXM_PARAMETER_OUT FfxBoolean bInMotionLastFrame) +{ + FfxFloat32x4 fHistory = HistorySample(params.fReprojectedHrUv, DisplaySize()); + + fHistoryColor = PrepareRgb(fHistory.rgb, Exposure(), PreviousFramePreExposure()); + +#if !FFXM_SHADER_QUALITY_OPT_TONEMAPPED_RGB_PREPARED_INPUT_COLOR + fHistoryColor = RGBToYCoCg(fHistoryColor); +#endif + + //Compute temporal reactivity info + fTemporalReactiveFactor = ffxSaturate(abs(fHistory.w)); + bInMotionLastFrame = (fHistory.w < 0.0f); +} + +LockState ReprojectHistoryLockStatus(const AccumulationPassCommonParams params, FFXM_PARAMETER_OUT FfxFloat32x2 fReprojectedLockStatus) +{ + LockState state = { FFXM_FALSE, FFXM_FALSE }; + const FfxFloat32 fNewLockIntensity = LoadRwNewLocks(params.iPxHrPos); + state.NewLock = fNewLockIntensity > (127.0f / 255.0f); + + FfxFloat32 fInPlaceLockLifetime = state.NewLock ? fNewLockIntensity : 0; + + fReprojectedLockStatus = SampleLockStatus(params.fReprojectedHrUv); + + if (fReprojectedLockStatus[LOCK_LIFETIME_REMAINING] != FfxFloat32(0.0f)) { + state.WasLockedPrevFrame = true; + } + + return state; +} +#else //FFXM_HALF + +void ReprojectHistoryColor(const AccumulationPassCommonParams params, FFXM_PARAMETER_OUT FfxFloat16x3 fHistoryColor, FFXM_PARAMETER_OUT FfxFloat16 fTemporalReactiveFactor, FFXM_PARAMETER_OUT FfxBoolean bInMotionLastFrame) +{ + FfxFloat16x4 fHistory = HistorySample(params.fReprojectedHrUv, DisplaySize()); + + fHistoryColor = FfxFloat16x3(PrepareRgb(fHistory.rgb, Exposure(), PreviousFramePreExposure())); + +#if !FFXM_SHADER_QUALITY_OPT_TONEMAPPED_RGB_PREPARED_INPUT_COLOR + fHistoryColor = RGBToYCoCg(fHistoryColor); +#endif + + //Compute temporal reactivity info +#if FFXM_SHADER_QUALITY_OPT_SEPARATE_TEMPORAL_REACTIVE + fTemporalReactiveFactor = FfxFloat16(ffxSaturate(abs(SampleTemporalReactive(params.fReprojectedHrUv)))); +#else + fTemporalReactiveFactor = FfxFloat16(ffxSaturate(abs(fHistory.w))); +#endif + bInMotionLastFrame = (fHistory.w < 0.0f); +} + +LockState ReprojectHistoryLockStatus(const AccumulationPassCommonParams params, FFXM_PARAMETER_OUT FfxFloat16x2 fReprojectedLockStatus) +{ + LockState state = { FFXM_FALSE, FFXM_FALSE }; + const FfxFloat16 fNewLockIntensity = FfxFloat16(LoadRwNewLocks(params.iPxHrPos)); + state.NewLock = fNewLockIntensity > (127.0f / 255.0f); + + FfxFloat16 fInPlaceLockLifetime = state.NewLock ? fNewLockIntensity : FfxFloat16(0); + + fReprojectedLockStatus = FfxFloat16x2(SampleLockStatus(params.fReprojectedHrUv)); + + if (fReprojectedLockStatus[LOCK_LIFETIME_REMAINING] != FfxFloat16(0.0f)) { + state.WasLockedPrevFrame = true; + } + return state; +} + +#endif + +#endif //!defined( FFXM_FSR2_REPROJECT_H ) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h.meta new file mode 100644 index 0000000..81fe61c --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h.meta @@ -0,0 +1,67 @@ +fileFormatVersion: 2 +guid: b997a83902840b04fbecef298bd4b620 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Android: 1 + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_resources.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_resources.h new file mode 100644 index 0000000..fb1fae9 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_resources.h @@ -0,0 +1,100 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#ifndef FFXM_FSR2_RESOURCES_H +#define FFXM_FSR2_RESOURCES_H + +#if defined(FFXM_CPU) || defined(FFXM_GPU) +#define FFXM_FSR2_RESOURCE_IDENTIFIER_NULL 0 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY 1 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR 2 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS 3 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH 4 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE 5 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK 6 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK 7 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH 8 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS 9 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH 10 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR 11 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS 12 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS 13 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR 14 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY 15 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT 16 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT 17 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT 18 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT 19 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT 20 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1 21 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2 22 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1 23 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 24 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY 25 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_TRANSPARENCY_AND_COMPOSITION 26 +#define FFXM_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT 27 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS 28 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE 29 // same as FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0 29 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_1 30 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_2 31 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_3 32 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4 33 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5 34 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_6 35 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_7 36 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_8 37 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_9 38 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_10 39 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_11 40 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12 41 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE 42 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE 43 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE 44 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS 45 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1 46 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2 47 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1 48 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2 49 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA 50 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_INTERNAL_TEMPORAL_REACTIVE 51 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_INTERNAL_TEMPORAL_REACTIVE_1 52 +#define FFXM_FSR2_RESOURCE_IDENTIFIER_INTERNAL_TEMPORAL_REACTIVE_2 53 + +// Shading change detection mip level setting, value must be in the range [FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0, FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12] +#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4 +#define FFXM_FSR2_SHADING_CHANGE_MIP_LEVEL (FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE - FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE) + +#define FFXM_FSR2_RESOURCE_IDENTIFIER_COUNT 54 + +#define FFXM_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2 0 +#define FFXM_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD 1 +#define FFXM_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS 2 +#define FFXM_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE 3 + +#define FFXM_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP 1 +#define FFXM_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP 2 +#define FFXM_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD 4 +#define FFXM_FSR2_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX 8 + +#endif // #if defined(FFXM_CPU) || defined(FFXM_GPU) + +#endif //!defined( FFXM_FSR2_RESOURCES_H ) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_resources.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_resources.h.meta new file mode 100644 index 0000000..8141bdd --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_resources.h.meta @@ -0,0 +1,67 @@ +fileFormatVersion: 2 +guid: 80c7963a01a0e5c4bb69e6b897267a9b +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Android: 1 + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_sample.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_sample.h new file mode 100644 index 0000000..8c5408f --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_sample.h @@ -0,0 +1,699 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#ifndef FFXM_FSR2_SAMPLE_H +#define FFXM_FSR2_SAMPLE_H + +// suppress warnings +#ifdef FFXM_HLSL +#pragma warning(disable: 4008) // potentially divide by zero +#endif //FFXM_HLSL + +struct FetchedBilinearSamples { + + FfxFloat32x4 fColor00; + FfxFloat32x4 fColor10; + + FfxFloat32x4 fColor01; + FfxFloat32x4 fColor11; +}; + +struct FetchedBicubicSamples { + + FfxFloat32x4 fColor00; + FfxFloat32x4 fColor10; + FfxFloat32x4 fColor20; + FfxFloat32x4 fColor30; + + FfxFloat32x4 fColor01; + FfxFloat32x4 fColor11; + FfxFloat32x4 fColor21; + FfxFloat32x4 fColor31; + + FfxFloat32x4 fColor02; + FfxFloat32x4 fColor12; + FfxFloat32x4 fColor22; + FfxFloat32x4 fColor32; + + FfxFloat32x4 fColor03; + FfxFloat32x4 fColor13; + FfxFloat32x4 fColor23; + FfxFloat32x4 fColor33; +}; + +#if FFXM_HALF +struct FetchedBilinearSamplesMin16 { + + FFXM_MIN16_F4 fColor00; + FFXM_MIN16_F4 fColor10; + + FFXM_MIN16_F4 fColor01; + FFXM_MIN16_F4 fColor11; +}; + +struct FetchedBicubicSamplesMin16 { + + FFXM_MIN16_F4 fColor00; + FFXM_MIN16_F4 fColor10; + FFXM_MIN16_F4 fColor20; + FFXM_MIN16_F4 fColor30; + + FFXM_MIN16_F4 fColor01; + FFXM_MIN16_F4 fColor11; + FFXM_MIN16_F4 fColor21; + FFXM_MIN16_F4 fColor31; + + FFXM_MIN16_F4 fColor02; + FFXM_MIN16_F4 fColor12; + FFXM_MIN16_F4 fColor22; + FFXM_MIN16_F4 fColor32; + + FFXM_MIN16_F4 fColor03; + FFXM_MIN16_F4 fColor13; + FFXM_MIN16_F4 fColor23; + FFXM_MIN16_F4 fColor33; +}; + +struct Fetched9TapSamplesMin16 { + + FFXM_MIN16_F4 fColor00; + FFXM_MIN16_F4 fColor10; + FFXM_MIN16_F4 fColor20; + + FFXM_MIN16_F4 fColor01; + FFXM_MIN16_F4 fColor11; + FFXM_MIN16_F4 fColor21; + + FFXM_MIN16_F4 fColor02; + FFXM_MIN16_F4 fColor12; + FFXM_MIN16_F4 fColor22; +}; + +#else //FFXM_HALF +#define FetchedBicubicSamplesMin16 FetchedBicubicSamples +#define FetchedBilinearSamplesMin16 FetchedBilinearSamples +#endif //FFXM_HALF + +FfxFloat32x4 Linear(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32 t) +{ + return A + (B - A) * t; +} + +FfxFloat32x4 Bilinear(FetchedBilinearSamples BilinearSamples, FfxFloat32x2 fPxFrac) +{ + FfxFloat32x4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x); + FfxFloat32x4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x); + FfxFloat32x4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y); + return fColorXY; +} + +#if FFXM_HALF +FFXM_MIN16_F4 Linear(FFXM_MIN16_F4 A, FFXM_MIN16_F4 B, FFXM_MIN16_F t) +{ + return A + (B - A) * t; +} + +FFXM_MIN16_F4 Bilinear(FetchedBilinearSamplesMin16 BilinearSamples, FFXM_MIN16_F2 fPxFrac) +{ + FFXM_MIN16_F4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x); + FFXM_MIN16_F4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x); + FFXM_MIN16_F4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y); + return fColorXY; +} +#endif + +FfxFloat32 Lanczos2NoClamp(FfxFloat32 x) +{ + const FfxFloat32 PI = 3.141592653589793f; // TODO: share SDK constants + return abs(x) < FSR2_EPSILON ? 1.f : (sin(PI * x) / (PI * x)) * (sin(0.5f * PI * x) / (0.5f * PI * x)); +} + +FfxFloat32 Lanczos2(FfxFloat32 x) +{ + x = ffxMin(abs(x), 2.0f); + return Lanczos2NoClamp(x); +} + +#if FFXM_HALF + +#if 1 +FFXM_MIN16_F Lanczos2NoClamp(FFXM_MIN16_F x) +{ + const FFXM_MIN16_F PI = FFXM_MIN16_F(3.141592653589793f); // TODO: share SDK constants + return abs(x) < FFXM_MIN16_F(FSR2_EPSILON) ? FFXM_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFXM_MIN16_F(0.5f) * PI * x) / (FFXM_MIN16_F(0.5f) * PI * x)); +} +#endif + +FFXM_MIN16_F Lanczos2(FFXM_MIN16_F x) +{ + x = ffxMin(abs(x), FFXM_MIN16_F(2.0f)); + return FFXM_MIN16_F(Lanczos2NoClamp(x)); +} +#endif //FFXM_HALF + +// FSR1 lanczos approximation. Input is x*x and must be <= 4. +FfxFloat32 Lanczos2ApproxSqNoClamp(FfxFloat32 x2) +{ + FfxFloat32 a = (2.0f / 5.0f) * x2 - 1; + FfxFloat32 b = (1.0f / 4.0f) * x2 - 1; + return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b); +} + +#if FFXM_HALF +FFXM_MIN16_F Lanczos2ApproxSqNoClamp(FFXM_MIN16_F x2) +{ + FFXM_MIN16_F a = FFXM_MIN16_F(2.0f / 5.0f) * x2 - FFXM_MIN16_F(1); + FFXM_MIN16_F b = FFXM_MIN16_F(1.0f / 4.0f) * x2 - FFXM_MIN16_F(1); + return (FFXM_MIN16_F(25.0f / 16.0f) * a * a - FFXM_MIN16_F(25.0f / 16.0f - 1)) * (b * b); +} +#endif //FFXM_HALF + +FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2) +{ + x2 = ffxMin(x2, 4.0f); + return Lanczos2ApproxSqNoClamp(x2); +} + +#if FFXM_HALF +FFXM_MIN16_F Lanczos2ApproxSq(FFXM_MIN16_F x2) +{ + x2 = ffxMin(x2, FFXM_MIN16_F(4.0f)); + return Lanczos2ApproxSqNoClamp(x2); +} +#endif //FFXM_HALF + +FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x) +{ + return Lanczos2ApproxSqNoClamp(x * x); +} + +#if FFXM_HALF +FFXM_MIN16_F Lanczos2ApproxNoClamp(FFXM_MIN16_F x) +{ + return Lanczos2ApproxSqNoClamp(x * x); +} +#endif //FFXM_HALF + +FfxFloat32 Lanczos2Approx(FfxFloat32 x) +{ + return Lanczos2ApproxSq(x * x); +} + +#if FFXM_HALF +FFXM_MIN16_F Lanczos2Approx(FFXM_MIN16_F x) +{ + return Lanczos2ApproxSq(x * x); +} +#endif //FFXM_HALF + +FfxFloat32 Lanczos2_UseLUT(FfxFloat32 x) +{ + return SampleLanczos2Weight(abs(x)); +} + +#if FFXM_HALF +FFXM_MIN16_F Lanczos2_UseLUT(FFXM_MIN16_F x) +{ + return FFXM_MIN16_F(SampleLanczos2Weight(abs(x))); +} +#endif //FFXM_HALF + +FfxFloat32x4 Lanczos2_UseLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) +{ + FfxFloat32 fWeight0 = Lanczos2_UseLUT(-1.f - t); + FfxFloat32 fWeight1 = Lanczos2_UseLUT(-0.f - t); + FfxFloat32 fWeight2 = Lanczos2_UseLUT(+1.f - t); + FfxFloat32 fWeight3 = Lanczos2_UseLUT(+2.f - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} +#if FFXM_HALF +FFXM_MIN16_F4 Lanczos2_UseLUT(FFXM_MIN16_F4 fColor0, FFXM_MIN16_F4 fColor1, FFXM_MIN16_F4 fColor2, FFXM_MIN16_F4 fColor3, FFXM_MIN16_F t) +{ + FFXM_MIN16_F fWeight0 = Lanczos2_UseLUT(FFXM_MIN16_F(-1.f) - t); + FFXM_MIN16_F fWeight1 = Lanczos2_UseLUT(FFXM_MIN16_F(-0.f) - t); + FFXM_MIN16_F fWeight2 = Lanczos2_UseLUT(FFXM_MIN16_F(+1.f) - t); + FFXM_MIN16_F fWeight3 = Lanczos2_UseLUT(FFXM_MIN16_F(+2.f) - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} +#endif + +FfxFloat32x4 Lanczos2(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) +{ + FfxFloat32 fWeight0 = Lanczos2(-1.f - t); + FfxFloat32 fWeight1 = Lanczos2(-0.f - t); + FfxFloat32 fWeight2 = Lanczos2(+1.f - t); + FfxFloat32 fWeight3 = Lanczos2(+2.f - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} + +FfxFloat32x4 Lanczos2(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) +{ + FfxFloat32x4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FfxFloat32x4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FfxFloat32x4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FfxFloat32x4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FfxFloat32x4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + +#if !FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FfxFloat32x4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; + FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; + + FFXM_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { + + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); +#endif + return fColorXY; +} + +#if FFXM_HALF +FFXM_MIN16_F4 Lanczos2(FFXM_MIN16_F4 fColor0, FFXM_MIN16_F4 fColor1, FFXM_MIN16_F4 fColor2, FFXM_MIN16_F4 fColor3, FFXM_MIN16_F t) +{ + FFXM_MIN16_F fWeight0 = Lanczos2(FFXM_MIN16_F(-1.f) - t); + FFXM_MIN16_F fWeight1 = Lanczos2(FFXM_MIN16_F(-0.f) - t); + FFXM_MIN16_F fWeight2 = Lanczos2(FFXM_MIN16_F(+1.f) - t); + FFXM_MIN16_F fWeight3 = Lanczos2(FFXM_MIN16_F(+2.f) - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} + +FFXM_MIN16_F4 Lanczos2(FetchedBicubicSamplesMin16 Samples, FFXM_MIN16_F2 fPxFrac) +{ + FFXM_MIN16_F4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FFXM_MIN16_F4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FFXM_MIN16_F4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FFXM_MIN16_F4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FFXM_MIN16_F4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + +#if !FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FFXM_MIN16_F4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FFXM_MIN16_F4 fDeringingMin = fDeringingSamples[0]; + FFXM_MIN16_F4 fDeringingMax = fDeringingSamples[0]; + + FFXM_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) + { + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); +#endif + return fColorXY; +} + +FFXM_MIN16_F4 Lanczos2(FFXM_MIN16_F4 fColor0, FFXM_MIN16_F4 fColor1, FFXM_MIN16_F4 fColor2, FFXM_MIN16_F t) +{ + FFXM_MIN16_F fWeight0 = Lanczos2(FFXM_MIN16_F(-1.f) - t); + FFXM_MIN16_F fWeight1 = Lanczos2(FFXM_MIN16_F(-0.f) - t); + FFXM_MIN16_F fWeight2 = Lanczos2(FFXM_MIN16_F(+1.f) - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2) / (fWeight0 + fWeight1 + fWeight2); +} + +FFXM_MIN16_F4 Lanczos2Approx(FFXM_MIN16_F4 fColor0, FFXM_MIN16_F4 fColor1, FFXM_MIN16_F4 fColor2, FFXM_MIN16_F t) +{ + FFXM_MIN16_F fWeight0 = Lanczos2ApproxNoClamp(FFXM_MIN16_F(-1.f) - t); + FFXM_MIN16_F fWeight1 = Lanczos2ApproxNoClamp(FFXM_MIN16_F(-0.f) - t); + FFXM_MIN16_F fWeight2 = Lanczos2ApproxNoClamp(FFXM_MIN16_F(+1.f) - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2) / (fWeight0 + fWeight1 + fWeight2); +} + +FFXM_MIN16_F4 Lanczos2Approx(Fetched9TapSamplesMin16 Samples, FFXM_MIN16_F2 fPxFrac) +{ + FFXM_MIN16_F4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, fPxFrac.x); + FFXM_MIN16_F4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, fPxFrac.x); + FFXM_MIN16_F4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, fPxFrac.x); + FFXM_MIN16_F4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fPxFrac.y); + +#if !FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING + // Deringing + const FfxInt32 iDeringingSampleCount = 4; + const FFXM_MIN16_F4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FFXM_MIN16_F4 fDeringingMin = fDeringingSamples[0]; + FFXM_MIN16_F4 fDeringingMax = fDeringingSamples[0]; + + FFXM_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) + { + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); +#endif + return fColorXY; +} + +#endif //FFXM_HALF + + +FfxFloat32x4 Lanczos2LUT(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) +{ + FfxFloat32x4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FfxFloat32x4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FfxFloat32x4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FfxFloat32x4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FfxFloat32x4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + +#if !FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FfxFloat32x4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; + FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; + + FFXM_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { + + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); +#endif + return fColorXY; +} + +#if FFXM_HALF +FFXM_MIN16_F4 Lanczos2LUT(FetchedBicubicSamplesMin16 Samples, FFXM_MIN16_F2 fPxFrac) +{ + FFXM_MIN16_F4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FFXM_MIN16_F4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FFXM_MIN16_F4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FFXM_MIN16_F4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FFXM_MIN16_F4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + +#if !FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FFXM_MIN16_F4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FFXM_MIN16_F4 fDeringingMin = fDeringingSamples[0]; + FFXM_MIN16_F4 fDeringingMax = fDeringingSamples[0]; + + FFXM_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) + { + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); +#endif + return fColorXY; +} +#endif //FFXM_HALF + + + +FfxFloat32x4 Lanczos2Approx(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) +{ + FfxFloat32 fWeight0 = Lanczos2ApproxNoClamp(-1.f - t); + FfxFloat32 fWeight1 = Lanczos2ApproxNoClamp(-0.f - t); + FfxFloat32 fWeight2 = Lanczos2ApproxNoClamp(+1.f - t); + FfxFloat32 fWeight3 = Lanczos2ApproxNoClamp(+2.f - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} + +#if FFXM_HALF +FFXM_MIN16_F4 Lanczos2Approx(FFXM_MIN16_F4 fColor0, FFXM_MIN16_F4 fColor1, FFXM_MIN16_F4 fColor2, FFXM_MIN16_F4 fColor3, FFXM_MIN16_F t) +{ + FFXM_MIN16_F fWeight0 = Lanczos2ApproxNoClamp(FFXM_MIN16_F(-1.f) - t); + FFXM_MIN16_F fWeight1 = Lanczos2ApproxNoClamp(FFXM_MIN16_F(-0.f) - t); + FFXM_MIN16_F fWeight2 = Lanczos2ApproxNoClamp(FFXM_MIN16_F(+1.f) - t); + FFXM_MIN16_F fWeight3 = Lanczos2ApproxNoClamp(FFXM_MIN16_F(+2.f) - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} +#endif //FFXM_HALF + +FfxFloat32x4 Lanczos2Approx(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) +{ + FfxFloat32x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FfxFloat32x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FfxFloat32x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FfxFloat32x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FfxFloat32x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + +#if !FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FfxFloat32x4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; + FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; + + FFXM_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) + { + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); +#endif + return fColorXY; +} + +#if FFXM_HALF +FFXM_MIN16_F4 Lanczos2Approx(FetchedBicubicSamplesMin16 Samples, FFXM_MIN16_F2 fPxFrac) +{ + FFXM_MIN16_F4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FFXM_MIN16_F4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FFXM_MIN16_F4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FFXM_MIN16_F4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FFXM_MIN16_F4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + +#if !FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FFXM_MIN16_F4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FFXM_MIN16_F4 fDeringingMin = fDeringingSamples[0]; + FFXM_MIN16_F4 fDeringingMax = fDeringingSamples[0]; + + FFXM_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) + { + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); +#endif + return fColorXY; +} +#endif + +// Clamp by offset direction. Assuming iPxSample is already in range and iPxOffset is compile time constant. +FfxInt32x2 ClampCoord(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize) +{ + FfxInt32x2 result = iPxSample + iPxOffset; + result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x; + result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x; + result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y; + result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y; + return result; +} +#if FFXM_HALF +FFXM_MIN16_I2 ClampCoord(FFXM_MIN16_I2 iPxSample, FFXM_MIN16_I2 iPxOffset, FFXM_MIN16_I2 iTextureSize) +{ + FFXM_MIN16_I2 result = iPxSample + iPxOffset; + result.x = (iPxOffset.x < FFXM_MIN16_I(0)) ? ffxMax(result.x, FFXM_MIN16_I(0)) : result.x; + result.x = (iPxOffset.x > FFXM_MIN16_I(0)) ? ffxMin(result.x, iTextureSize.x - FFXM_MIN16_I(1)) : result.x; + result.y = (iPxOffset.y < FFXM_MIN16_I(0)) ? ffxMax(result.y, FFXM_MIN16_I(0)) : result.y; + result.y = (iPxOffset.y > FFXM_MIN16_I(0)) ? ffxMin(result.y, iTextureSize.y - FFXM_MIN16_I(1)) : result.y; + return result; +} +#endif //FFXM_HALF + + +#define DeclareCustomFetchBicubicSamplesWithType(SampleType, TextureType, AddrType, Name, LoadTexture) \ + SampleType Name(AddrType iPxSample, AddrType iTextureSize) \ + { \ + SampleType Samples; \ + \ + Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, -1), iTextureSize))); \ + Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, -1), iTextureSize))); \ + Samples.fColor20 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, -1), iTextureSize))); \ + Samples.fColor30 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, -1), iTextureSize))); \ + \ + Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +0), iTextureSize))); \ + Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \ + Samples.fColor21 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \ + Samples.fColor31 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +0), iTextureSize))); \ + \ + Samples.fColor02 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +1), iTextureSize))); \ + Samples.fColor12 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \ + Samples.fColor22 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \ + Samples.fColor32 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +1), iTextureSize))); \ + \ + Samples.fColor03 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +2), iTextureSize))); \ + Samples.fColor13 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +2), iTextureSize))); \ + Samples.fColor23 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +2), iTextureSize))); \ + Samples.fColor33 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +2), iTextureSize))); \ + \ + return Samples; \ + } + +#define DeclareCustomFetch9TapSamplesWithType(SampleType, TextureType, AddrType, Name, LoadTexture) \ + SampleType Name(AddrType iPxSample, AddrType iTextureSize) \ + { \ + SampleType Samples; \ + \ + Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, -1), iTextureSize))); \ + Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, -1), iTextureSize))); \ + Samples.fColor20 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, -1), iTextureSize))); \ + \ + Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +0), iTextureSize))); \ + Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \ + Samples.fColor21 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \ + \ + Samples.fColor02 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +1), iTextureSize))); \ + Samples.fColor12 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \ + Samples.fColor22 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \ + \ + return Samples; \ + } + +#define DeclareCustomFetchBicubicSamples(Name, LoadTexture) \ + DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture) + +#define DeclareCustomFetchBicubicSamplesMin16(Name, LoadTexture) \ + DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamplesMin16, FFXM_MIN16_F4, FfxInt32x2, Name, LoadTexture) + +#define DeclareCustomFetch9TapSamplesMin16(Name, LoadTexture) \ + DeclareCustomFetch9TapSamplesWithType(Fetched9TapSamplesMin16, FFXM_MIN16_F4, FfxInt32x2, Name, LoadTexture) + +#define DeclareCustomFetchBilinearSamplesWithType(SampleType, TextureType,AddrType, Name, LoadTexture) \ + SampleType Name(AddrType iPxSample, AddrType iTextureSize) \ + { \ + SampleType Samples; \ + Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \ + Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \ + Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \ + Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \ + return Samples; \ + } + +#define DeclareCustomFetchBilinearSamples(Name, LoadTexture) \ + DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture) + +#define DeclareCustomFetchBilinearSamplesMin16(Name, LoadTexture) \ + DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamplesMin16, FFXM_MIN16_F4, FfxInt32x2, Name, LoadTexture) + +// BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102) +// is common, so iPxSample can "jitter" +#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \ + FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ + { \ + FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \ + /* Clamp base coords */ \ + fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \ + fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \ + /* */ \ + FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ + FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \ + FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ + return fColorXY; \ + } + +#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \ + FFXM_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ + { \ + FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \ + /* Clamp base coords */ \ + fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \ + fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \ + /* */ \ + FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ + FFXM_MIN16_F2 fPxFrac = FFXM_MIN16_F2(ffxFract(fPxSample)); \ + FFXM_MIN16_F4 fColorXY = FFXM_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ + return fColorXY; \ + } + +#define FFXM_FSR2_CONCAT_ID(x, y) x ## y +#define FFXM_FSR2_CONCAT(x, y) FFXM_FSR2_CONCAT_ID(x, y) +#define FFXM_FSR2_SAMPLER_1D_0 Lanczos2 +#define FFXM_FSR2_SAMPLER_1D_1 Lanczos2LUT +#define FFXM_FSR2_SAMPLER_1D_2 Lanczos2Approx + +#define FFXM_FSR2_GET_LANCZOS_SAMPLER1D(x) FFXM_FSR2_CONCAT(FFXM_FSR2_SAMPLER_1D_, x) + +#endif //!defined( FFXM_FSR2_SAMPLE_H ) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_sample.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_sample.h.meta new file mode 100644 index 0000000..5b81ce7 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_sample.h.meta @@ -0,0 +1,67 @@ +fileFormatVersion: 2 +guid: 161ce220c1b38aa41992c3c6e1099300 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Android: 1 + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_upsample.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_upsample.h new file mode 100644 index 0000000..d41127d --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_upsample.h @@ -0,0 +1,195 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#ifndef FFXM_FSR2_UPSAMPLE_H +#define FFXM_FSR2_UPSAMPLE_H + +#define FFXM_FSR2_UPSAMPLE_USE_LANCZOS_9_TAP 0 +#define FFXM_FSR2_UPSAMPLE_USE_LANCZOS_5_TAP 1 + +#if FFXM_SHADER_QUALITY_OPT_UPSCALING_LANCZOS_5TAP +#define FFXM_FSR2_UPSAMPLE_KERNEL FFXM_FSR2_UPSAMPLE_USE_LANCZOS_5_TAP +FFXM_STATIC const FfxInt32 iLanczos2SampleCount = 5; +#else +#define FFXM_FSR2_UPSAMPLE_KERNEL FFXM_FSR2_UPSAMPLE_USE_LANCZOS_9_TAP +FFXM_STATIC const FfxUInt32 iLanczos2SampleCount = 16; +#endif + + +void Deringing(RectificationBox clippingBox, FFXM_PARAMETER_INOUT FfxFloat32x3 fColor) +{ + fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax); +} +#if FFXM_HALF +void Deringing(RectificationBoxMin16 clippingBox, FFXM_PARAMETER_INOUT FFXM_MIN16_F3 fColor) +{ + fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax); +} +#endif + +FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32 fKernelWeight) +{ + FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx; + FfxFloat32 fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); + return fSampleWeight; +} + +#if FFXM_HALF +FFXM_MIN16_F GetUpsampleLanczosWeight(FFXM_MIN16_F2 fSrcSampleOffset, FFXM_MIN16_F fKernelWeight) +{ + FFXM_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx; + FFXM_MIN16_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); + return fSampleWeight; +} +#endif + +FfxFloat32 ComputeMaxKernelWeight() { + const FfxFloat32 fKernelSizeBias = 1.0f; + + FfxFloat32 fKernelWeight = FfxFloat32(1) + (FfxFloat32(1.0f) / FfxFloat32x2(DownscaleFactor()) - FfxFloat32(1)).x * FfxFloat32(fKernelSizeBias); + + return ffxMin(FfxFloat32(1.99f), fKernelWeight); +} + +#if FFXM_HALF +FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params, + FFXM_PARAMETER_INOUT RectificationBoxMin16 clippingBox, FfxFloat32 fReactiveFactor) +#else +FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params, + FFXM_PARAMETER_INOUT RectificationBox clippingBox, FfxFloat32 fReactiveFactor) +#endif +{ + // We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly) + FfxFloat32x2 fDstOutputPos = FfxFloat32x2(params.iPxHrPos) + FFXM_BROADCAST_FLOAT32X2(0.5f); // Destination resolution output pixel center position + FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor(); // Source resolution output pixel center position + FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos)); // TODO: what about weird upscale factors... + + FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0 + + FfxFloat32x2 iSrcInputUv = FfxFloat32x2(fSrcOutputPos) / FfxFloat32x2(RenderSize()); + FfxFloat32x2 unitOffsetUv = FfxFloat32x2(1.0f, 1.0f) / FfxFloat32x2(RenderSize()); + + FFXM_MIN16_F4 fColorAndWeight = FFXM_MIN16_F4(0.0f, 0.0f, 0.0f, 0.0f); + + FFXM_MIN16_F2 fBaseSampleOffset = FFXM_MIN16_F2(fSrcUnjitteredPos - fSrcOutputPos); + + // Identify how much of each upsampled color to be used for this frame + const FFXM_MIN16_F fKernelReactiveFactor = FFXM_MIN16_F(ffxMax(fReactiveFactor, FfxFloat32(params.bIsNewSample))); + const FFXM_MIN16_F fKernelBiasMax = FFXM_MIN16_F(ComputeMaxKernelWeight() * (1.0f - fKernelReactiveFactor)); + + const FFXM_MIN16_F fKernelBiasMin = FFXM_MIN16_F(ffxMax(1.0f, ((1.0f + fKernelBiasMax) * 0.3f))); + const FFXM_MIN16_F fKernelBiasFactor = FFXM_MIN16_F(ffxMax(0.0f, ffxMax(0.25f * params.fDepthClipFactor, fKernelReactiveFactor))); + const FFXM_MIN16_F fKernelBias = ffxLerp(fKernelBiasMax, fKernelBiasMin, fKernelBiasFactor); + + const FFXM_MIN16_F fRectificationCurveBias = FFXM_MIN16_F(ffxLerp(-2.0f, -3.0f, ffxSaturate(params.fHrVelocity / 50.0f))); + + FFXM_MIN16_F2 offsetTL; + offsetTL.x = FFXM_MIN16_F(-1); + offsetTL.y = FFXM_MIN16_F(-1); + + FFXM_MIN16_F2 fOffsetTL = offsetTL; + +#if FFXM_FSR2_UPSAMPLE_KERNEL == FFXM_FSR2_UPSAMPLE_USE_LANCZOS_9_TAP + FFXM_MIN16_F3 fSamples[iLanczos2SampleCount]; + // Collect samples + GatherPreparedInputColorRGBQuad(FfxFloat32x2(-0.5, -0.5) * unitOffsetUv + iSrcInputUv, + fSamples[0], fSamples[1], fSamples[4], fSamples[5]); + fSamples[2] = LoadPreparedInputColor(FfxInt32x2(1, -1) + iSrcInputPos); + fSamples[6] = LoadPreparedInputColor(FfxInt32x2(1, 0) + iSrcInputPos); + fSamples[8] = LoadPreparedInputColor(FfxInt32x2(-1, 1) + iSrcInputPos); + fSamples[9] = LoadPreparedInputColor(FfxInt32x2(0, 1) + iSrcInputPos); + fSamples[10] = LoadPreparedInputColor(FfxInt32x2(1, 1) + iSrcInputPos); + + FFXM_UNROLL + for (FfxInt32 row = 0; row < 3; row++) + { + FFXM_UNROLL + for (FfxInt32 col = 0; col < 3; col++) + { + FfxInt32 iSampleIndex = col + (row << 2); + const FfxInt32x2 sampleColRow = FfxInt32x2(col, row); + const FFXM_MIN16_F2 fOffset = fOffsetTL + FFXM_MIN16_F2(sampleColRow); + FFXM_MIN16_F2 fSrcSampleOffset = fBaseSampleOffset + fOffset; + + FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow; + FFXM_MIN16_F fSampleWeight = FFXM_MIN16_F(GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelBias)); + + fColorAndWeight += FFXM_MIN16_F4(fSamples[iSampleIndex] * fSampleWeight, fSampleWeight); + + // Update rectification box + { + const FFXM_MIN16_F fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset); + const FFXM_MIN16_F fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq); + + const FfxBoolean bInitialSample = (row == 0) && (col == 0); + RectificationBoxAddSample(bInitialSample, clippingBox, fSamples[iSampleIndex], fBoxSampleWeight); + } + } + } +#elif FFXM_FSR2_UPSAMPLE_KERNEL == FFXM_FSR2_UPSAMPLE_USE_LANCZOS_5_TAP + + FFXM_MIN16_F3 fSamples[iLanczos2SampleCount]; + // Collect samples + FfxInt32x2 rowCol [iLanczos2SampleCount] = {FfxInt32x2(0, -1), FfxInt32x2(-1, 0), FfxInt32x2(0, 0), FfxInt32x2(1, 0), FfxInt32x2(0, 1)}; + fSamples[0] = LoadPreparedInputColor(rowCol[0] + iSrcInputPos); + fSamples[1] = LoadPreparedInputColor(rowCol[1] + iSrcInputPos); + fSamples[2] = LoadPreparedInputColor(rowCol[2] + iSrcInputPos); + fSamples[3] = LoadPreparedInputColor(rowCol[3] + iSrcInputPos); + fSamples[4] = LoadPreparedInputColor(rowCol[4] + iSrcInputPos); + FFXM_UNROLL + for (FfxInt32 idx = 0; idx < iLanczos2SampleCount; idx++) + { + const FfxInt32x2 sampleColRow = rowCol[idx]; + const FFXM_MIN16_F2 fOffset = FFXM_MIN16_F2(sampleColRow); + FFXM_MIN16_F2 fSrcSampleOffset = fBaseSampleOffset + fOffset; + + FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow; + FFXM_MIN16_F fSampleWeight = FFXM_MIN16_F(GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelBias)); + + fColorAndWeight += FFXM_MIN16_F4(fSamples[idx] * fSampleWeight, fSampleWeight); + + // Update rectification box + { + const FFXM_MIN16_F fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset); + const FFXM_MIN16_F fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq); + + const FfxBoolean bInitialSample = (idx == 0); + RectificationBoxAddSample(bInitialSample, clippingBox, fSamples[idx], fBoxSampleWeight); + } + } + +#endif + + RectificationBoxComputeVarianceBoxData(clippingBox); + + fColorAndWeight.w *= FFXM_MIN16_F(fColorAndWeight.w > FSR2_EPSILON); + + if (fColorAndWeight.w > FSR2_EPSILON) { + // Normalize for deringing (we need to compare colors) + fColorAndWeight.xyz = fColorAndWeight.xyz / fColorAndWeight.w; + fColorAndWeight.w = FFXM_MIN16_F(fColorAndWeight.w*fUpsampleLanczosWeightScale); + + Deringing(clippingBox, fColorAndWeight.xyz); + } + return fColorAndWeight; +} + +#endif //!defined( FFXM_FSR2_UPSAMPLE_H ) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_upsample.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_upsample.h.meta new file mode 100644 index 0000000..00d48d1 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_upsample.h.meta @@ -0,0 +1,67 @@ +fileFormatVersion: 2 +guid: adbae71b3f272394a895f14e3c09e3e2 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Android: 1 + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd.meta new file mode 100644 index 0000000..484847f --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: bc1175974e28a1344bca96b5e00fc1cf +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd/ffxm_spd.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd/ffxm_spd.h new file mode 100644 index 0000000..73b2af0 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd/ffxm_spd.h @@ -0,0 +1,1013 @@ +// Copyright © 2023 Advanced Micro Devices, Inc. +// Copyright © 2024 Arm Limited. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +/// @defgroup FfxGPUSpd FidelityFX SPD +/// FidelityFX Single Pass Downsampler 2.0 GPU documentation +/// +/// @ingroup FfxGPUEffects + +/// Setup required constant values for SPD (CPU). +/// +/// @param [out] dispatchThreadGroupCountXY CPU side: dispatch thread group count xy. z is number of slices of the input texture +/// @param [out] workGroupOffset GPU side: pass in as constant +/// @param [out] numWorkGroupsAndMips GPU side: pass in as constant +/// @param [in] rectInfo left, top, width, height +/// @param [in] mips optional: if -1, calculate based on rect width and height +/// +/// @ingroup FfxGPUSpd +#if defined(FFXM_CPU) +FFXM_STATIC void ffxSpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY, + FfxUInt32x2 workGroupOffset, + FfxUInt32x2 numWorkGroupsAndMips, + FfxUInt32x4 rectInfo, + FfxInt32 mips) +{ + // determines the offset of the first tile to downsample based on + // left (rectInfo[0]) and top (rectInfo[1]) of the subregion. + workGroupOffset[0] = rectInfo[0] / 64; + workGroupOffset[1] = rectInfo[1] / 64; + + FfxUInt32 endIndexX = (rectInfo[0] + rectInfo[2] - 1) / 64; // rectInfo[0] = left, rectInfo[2] = width + FfxUInt32 endIndexY = (rectInfo[1] + rectInfo[3] - 1) / 64; // rectInfo[1] = top, rectInfo[3] = height + + // we only need to dispatch as many thread groups as tiles we need to downsample + // number of tiles per slice depends on the subregion to downsample + dispatchThreadGroupCountXY[0] = endIndexX + 1 - workGroupOffset[0]; + dispatchThreadGroupCountXY[1] = endIndexY + 1 - workGroupOffset[1]; + + // number of thread groups per slice + numWorkGroupsAndMips[0] = (dispatchThreadGroupCountXY[0]) * (dispatchThreadGroupCountXY[1]); + + if (mips >= 0) + { + numWorkGroupsAndMips[1] = FfxUInt32(mips); + } + else + { + // calculate based on rect width and height + FfxUInt32 resolution = ffxMax(rectInfo[2], rectInfo[3]); + numWorkGroupsAndMips[1] = FfxUInt32((ffxMin(floor(log2(FfxFloat32(resolution))), FfxFloat32(12)))); + } +} + +/// Setup required constant values for SPD (CPU). +/// +/// @param [out] dispatchThreadGroupCountXY CPU side: dispatch thread group count xy. z is number of slices of the input texture +/// @param [out] workGroupOffset GPU side: pass in as constant +/// @param [out] numWorkGroupsAndMips GPU side: pass in as constant +/// @param [in] rectInfo left, top, width, height +/// +/// @ingroup FfxGPUSpd +FFXM_STATIC void ffxSpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY, + FfxUInt32x2 workGroupOffset, + FfxUInt32x2 numWorkGroupsAndMips, + FfxUInt32x4 rectInfo) +{ + ffxSpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo, -1); +} +#endif // #if defined(FFXM_CPU) + + +//============================================================================================================================== +// NON-PACKED VERSION +//============================================================================================================================== +#if defined(FFXM_GPU) +#if defined(FFXM_SPD_PACKED_ONLY) +// Avoid compiler errors by including default implementations of these callbacks. +FfxFloat32x4 SpdLoadSourceImage(FfxInt32x2 p, FfxUInt32 slice) +{ + return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); +} + +FfxFloat32x4 SpdLoad(FfxInt32x2 p, FfxUInt32 slice) +{ + return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); +} +void SpdStore(FfxInt32x2 p, FfxFloat32x4 value, FfxUInt32 mip, FfxUInt32 slice) +{ +} +FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); +} +void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) +{ +} +FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) +{ + return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); +} +#endif // #if FFXM_SPD_PACKED_ONLY + +//_____________________________________________________________/\_______________________________________________________________ +#if defined(FFXM_GLSL) && !defined(FFXM_SPD_NO_WAVE_OPERATIONS) +#extension GL_KHR_shader_subgroup_quad:require +#endif + +void ffxSpdWorkgroupShuffleBarrier() +{ + FFXM_GROUP_MEMORY_BARRIER(); +} + +// Only last active workgroup should proceed +bool SpdExitWorkgroup(FfxUInt32 numWorkGroups, FfxUInt32 localInvocationIndex, FfxUInt32 slice) +{ + // global atomic counter + if (localInvocationIndex == 0) + { + SpdIncreaseAtomicCounter(slice); + } + + ffxSpdWorkgroupShuffleBarrier(); + return (SpdGetAtomicCounter() != (numWorkGroups - 1)); +} + +// User defined: FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3); +FfxFloat32x4 SpdReduceQuad(FfxFloat32x4 v) +{ +#if defined(FFXM_GLSL) && !defined(FFXM_SPD_NO_WAVE_OPERATIONS) + + FfxFloat32x4 v0 = v; + FfxFloat32x4 v1 = subgroupQuadSwapHorizontal(v); + FfxFloat32x4 v2 = subgroupQuadSwapVertical(v); + FfxFloat32x4 v3 = subgroupQuadSwapDiagonal(v); + return SpdReduce4(v0, v1, v2, v3); + +#elif defined(FFXM_HLSL) && !defined(FFXM_SPD_NO_WAVE_OPERATIONS) + + // requires SM6.0 + FfxFloat32x4 v0 = v; + FfxFloat32x4 v1 = QuadReadAcrossX(v); + FfxFloat32x4 v2 = QuadReadAcrossY(v); + FfxFloat32x4 v3 = QuadReadAcrossDiagonal(v); + return SpdReduce4(v0, v1, v2, v3); +/* + // if SM6.0 is not available, you can use the AMD shader intrinsics + // the AMD shader intrinsics are available in AMD GPU Services (AGS) library: + // https://gpuopen.com/amd-gpu-services-ags-library/ + // works for DX11 + FfxFloat32x4 v0 = v; + FfxFloat32x4 v1; + v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + FfxFloat32x4 v2; + v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + FfxFloat32x4 v3; + v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + return SpdReduce4(v0, v1, v2, v3); + */ +#endif + return v; +} + +FfxFloat32x4 SpdReduceIntermediate(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3) +{ + FfxFloat32x4 v0 = SpdLoadIntermediate(i0.x, i0.y); + FfxFloat32x4 v1 = SpdLoadIntermediate(i1.x, i1.y); + FfxFloat32x4 v2 = SpdLoadIntermediate(i2.x, i2.y); + FfxFloat32x4 v3 = SpdLoadIntermediate(i3.x, i3.y); + return SpdReduce4(v0, v1, v2, v3); +} + +FfxFloat32x4 SpdReduceLoad4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice) +{ + FfxFloat32x4 v0 = SpdLoad(FfxInt32x2(i0), slice); + FfxFloat32x4 v1 = SpdLoad(FfxInt32x2(i1), slice); + FfxFloat32x4 v2 = SpdLoad(FfxInt32x2(i2), slice); + FfxFloat32x4 v3 = SpdLoad(FfxInt32x2(i3), slice); + return SpdReduce4(v0, v1, v2, v3); +} + +FfxFloat32x4 SpdReduceLoad4(FfxUInt32x2 base, FfxUInt32 slice) +{ + return SpdReduceLoad4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice); +} + +FfxFloat32x4 SpdReduceLoadSourceImage4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice) +{ + FfxFloat32x4 v0 = SpdLoadSourceImage(FfxInt32x2(i0), slice); + FfxFloat32x4 v1 = SpdLoadSourceImage(FfxInt32x2(i1), slice); + FfxFloat32x4 v2 = SpdLoadSourceImage(FfxInt32x2(i2), slice); + FfxFloat32x4 v3 = SpdLoadSourceImage(FfxInt32x2(i3), slice); + return SpdReduce4(v0, v1, v2, v3); +} + +FfxFloat32x4 SpdReduceLoadSourceImage(FfxUInt32x2 base, FfxUInt32 slice) +{ +#if defined(SPD_LINEAR_SAMPLER) + return SpdLoadSourceImage(FfxInt32x2(base), slice); +#else + return SpdReduceLoadSourceImage4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice); +#endif +} + +void SpdDownsampleMips_0_1_Intrinsics(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ + FfxFloat32x4 v[4]; + + FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2); + FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y); + v[0] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[0], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y); + v[1] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[1], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16); + v[2] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[2], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16); + v[3] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[3], 0, slice); + + if (mip <= 1) + return; + + v[0] = SpdReduceQuad(v[0]); + v[1] = SpdReduceQuad(v[1]); + v[2] = SpdReduceQuad(v[2]); + v[3] = SpdReduceQuad(v[3]); + + if ((localInvocationIndex % 4) == 0) + { + SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2), v[0], 1, slice); + SpdStoreIntermediate(x / 2, y / 2, v[0]); + + SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2), v[1], 1, slice); + SpdStoreIntermediate(x / 2 + 8, y / 2, v[1]); + + SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2 + 8), v[2], 1, slice); + SpdStoreIntermediate(x / 2, y / 2 + 8, v[2]); + + SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2 + 8), v[3], 1, slice); + SpdStoreIntermediate(x / 2 + 8, y / 2 + 8, v[3]); + } +} + +void SpdDownsampleMips_0_1_LDS(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ + FfxFloat32x4 v[4]; + + FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2); + FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y); + v[0] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[0], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y); + v[1] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[1], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16); + v[2] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[2], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16); + v[3] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[3], 0, slice); + + if (mip <= 1) + return; + + for (FfxUInt32 i = 0; i < 4; i++) + { + SpdStoreIntermediate(x, y, v[i]); + ffxSpdWorkgroupShuffleBarrier(); + if (localInvocationIndex < 64) + { + v[i] = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); + SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice); + } + ffxSpdWorkgroupShuffleBarrier(); + } + + if (localInvocationIndex < 64) + { + SpdStoreIntermediate(x + 0, y + 0, v[0]); + SpdStoreIntermediate(x + 8, y + 0, v[1]); + SpdStoreIntermediate(x + 0, y + 8, v[2]); + SpdStoreIntermediate(x + 8, y + 8, v[3]); + } +} + +void SpdDownsampleMips_0_1(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#if defined(FFXM_SPD_NO_WAVE_OPERATIONS) + SpdDownsampleMips_0_1_LDS(x, y, workGroupID, localInvocationIndex, mip, slice); +#else + SpdDownsampleMips_0_1_Intrinsics(x, y, workGroupID, localInvocationIndex, mip, slice); +#endif +} + + +void SpdDownsampleMip_2(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#if defined(FFXM_SPD_NO_WAVE_OPERATIONS) + if (localInvocationIndex < 64) + { + FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); + SpdStore(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x, y), v, mip, slice); + // store to LDS, try to reduce bank conflicts + // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 + // ... + // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 + SpdStoreIntermediate(x * 2 + y % 2, y * 2, v); + } +#else + FfxFloat32x4 v = SpdLoadIntermediate(x, y); + v = SpdReduceQuad(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStore(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x / 2, y / 2), v, mip, slice); + SpdStoreIntermediate(x + (y / 2) % 2, y, v); + } +#endif +} + +void SpdDownsampleMip_3(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#if defined(FFXM_SPD_NO_WAVE_OPERATIONS) + if (localInvocationIndex < 16) + { + // x 0 x 0 + // 0 0 0 0 + // 0 x 0 x + // 0 0 0 0 + FfxFloat32x4 v = + SpdReduceIntermediate(FfxUInt32x2(x * 4 + 0 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 2 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 0 + 1, y * 4 + 2), FfxUInt32x2(x * 4 + 2 + 1, y * 4 + 2)); + SpdStore(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x, y), v, mip, slice); + // store to LDS + // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 + // ... + // 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 + // ... + // 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x + // ... + SpdStoreIntermediate(x * 4 + y, y * 4, v); + } +#else + if (localInvocationIndex < 64) + { + FfxFloat32x4 v = SpdLoadIntermediate(x * 2 + y % 2, y * 2); + v = SpdReduceQuad(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStore(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x / 2, y / 2), v, mip, slice); + SpdStoreIntermediate(x * 2 + y / 2, y * 2, v); + } + } +#endif +} + +void SpdDownsampleMip_4(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#if defined(FFXM_SPD_NO_WAVE_OPERATIONS) + if (localInvocationIndex < 4) + { + // x 0 0 0 x 0 0 0 + // ... + // 0 x 0 0 0 x 0 0 + FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 8 + 0 + 0 + y * 2, y * 8 + 0), + FfxUInt32x2(x * 8 + 4 + 0 + y * 2, y * 8 + 0), + FfxUInt32x2(x * 8 + 0 + 1 + y * 2, y * 8 + 4), + FfxUInt32x2(x * 8 + 4 + 1 + y * 2, y * 8 + 4)); + SpdStore(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x, y), v, mip, slice); + // store to LDS + // x x x x 0 ... + // 0 ... + SpdStoreIntermediate(x + y * 2, 0, v); + } +#else + if (localInvocationIndex < 16) + { + FfxFloat32x4 v = SpdLoadIntermediate(x * 4 + y, y * 4); + v = SpdReduceQuad(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStore(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x / 2, y / 2), v, mip, slice); + SpdStoreIntermediate(x / 2 + y, 0, v); + } + } +#endif +} + +void SpdDownsampleMip_5(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#if defined(FFXM_SPD_NO_WAVE_OPERATIONS) + if (localInvocationIndex < 1) + { + // x x x x 0 ... + // 0 ... + FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(0, 0), FfxUInt32x2(1, 0), FfxUInt32x2(2, 0), FfxUInt32x2(3, 0)); + SpdStore(FfxInt32x2(workGroupID.xy), v, mip, slice); + } +#else + if (localInvocationIndex < 4) + { + FfxFloat32x4 v = SpdLoadIntermediate(localInvocationIndex, 0); + v = SpdReduceQuad(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStore(FfxInt32x2(workGroupID.xy), v, mip, slice); + } + } +#endif +} + +void SpdDownsampleMips_6_7(FfxUInt32 x, FfxUInt32 y, FfxUInt32 mips, FfxUInt32 slice) +{ + FfxInt32x2 tex = FfxInt32x2(x * 4 + 0, y * 4 + 0); + FfxInt32x2 pix = FfxInt32x2(x * 2 + 0, y * 2 + 0); + FfxFloat32x4 v0 = SpdReduceLoad4(tex, slice); + SpdStore(pix, v0, 6, slice); + + tex = FfxInt32x2(x * 4 + 2, y * 4 + 0); + pix = FfxInt32x2(x * 2 + 1, y * 2 + 0); + FfxFloat32x4 v1 = SpdReduceLoad4(tex, slice); + SpdStore(pix, v1, 6, slice); + + tex = FfxInt32x2(x * 4 + 0, y * 4 + 2); + pix = FfxInt32x2(x * 2 + 0, y * 2 + 1); + FfxFloat32x4 v2 = SpdReduceLoad4(tex, slice); + SpdStore(pix, v2, 6, slice); + + tex = FfxInt32x2(x * 4 + 2, y * 4 + 2); + pix = FfxInt32x2(x * 2 + 1, y * 2 + 1); + FfxFloat32x4 v3 = SpdReduceLoad4(tex, slice); + SpdStore(pix, v3, 6, slice); + + if (mips <= 7) + return; + // no barrier needed, working on values only from the same thread + + FfxFloat32x4 v = SpdReduce4(v0, v1, v2, v3); + SpdStore(FfxInt32x2(x, y), v, 7, slice); + SpdStoreIntermediate(x, y, v); +} + +void SpdDownsampleNextFour(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 baseMip, FfxUInt32 mips, FfxUInt32 slice) +{ + if (mips <= baseMip) + return; + ffxSpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_2(x, y, workGroupID, localInvocationIndex, baseMip, slice); + + if (mips <= baseMip + 1) + return; + ffxSpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_3(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice); + + if (mips <= baseMip + 2) + return; + ffxSpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_4(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice); + + if (mips <= baseMip + 3) + return; + ffxSpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_5(workGroupID, localInvocationIndex, baseMip + 3, slice); +} + +/// Downsamples a 64x64 tile based on the work group id. +/// If after downsampling it's the last active thread group, computes the remaining MIP levels. +/// +/// @param [in] workGroupID index of the work group / thread group +/// @param [in] localInvocationIndex index of the thread within the thread group in 1D +/// @param [in] mips the number of total MIP levels to compute for the input texture +/// @param [in] numWorkGroups the total number of dispatched work groups / thread groups for this slice +/// @param [in] slice the slice of the input texture +/// +/// @ingroup FfxGPUSpd +void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice) +{ + // compute MIP level 0 and 1 + FfxUInt32x2 sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64); + FfxUInt32 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2); + FfxUInt32 y = sub_xy.y + 8 * ((localInvocationIndex >> 7)); + SpdDownsampleMips_0_1(x, y, workGroupID, localInvocationIndex, mips, slice); + + // compute MIP level 2, 3, 4, 5 + SpdDownsampleNextFour(x, y, workGroupID, localInvocationIndex, 2, mips, slice); + + if (mips <= 6) + return; + + // increase the global atomic counter for the given slice and check if it's the last remaining thread group: + // terminate if not, continue if yes. + if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice)) + return; + + // reset the global atomic counter back to 0 for the next spd dispatch + SpdResetAtomicCounter(slice); + + // After mip 5 there is only a single workgroup left that downsamples the remaining up to 64x64 texels. + // compute MIP level 6 and 7 + SpdDownsampleMips_6_7(x, y, mips, slice); + + // compute MIP level 8, 9, 10, 11 + SpdDownsampleNextFour(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice); +} +/// Downsamples a 64x64 tile based on the work group id and work group offset. +/// If after downsampling it's the last active thread group, computes the remaining MIP levels. +/// +/// @param [in] workGroupID index of the work group / thread group +/// @param [in] localInvocationIndex index of the thread within the thread group in 1D +/// @param [in] mips the number of total MIP levels to compute for the input texture +/// @param [in] numWorkGroups the total number of dispatched work groups / thread groups for this slice +/// @param [in] slice the slice of the input texture +/// @param [in] workGroupOffset the work group offset. it's (0,0) in case the entire input texture is downsampled. +/// +/// @ingroup FfxGPUSpd +void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset) +{ + SpdDownsample(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//============================================================================================================================== +// PACKED VERSION +//============================================================================================================================== + +#if FFXM_HALF + +#if defined(FFXM_GLSL) +#extension GL_EXT_shader_subgroup_extended_types_float16:require +#endif + +FfxFloat16x4 SpdReduceQuadH(FfxFloat16x4 v) +{ +#if defined(FFXM_GLSL) && !defined(FFXM_SPD_NO_WAVE_OPERATIONS) + FfxFloat16x4 v0 = v; + FfxFloat16x4 v1 = subgroupQuadSwapHorizontal(v); + FfxFloat16x4 v2 = subgroupQuadSwapVertical(v); + FfxFloat16x4 v3 = subgroupQuadSwapDiagonal(v); + return SpdReduce4H(v0, v1, v2, v3); +#elif defined(FFXM_HLSL) && !defined(FFXM_SPD_NO_WAVE_OPERATIONS) + // requires SM6.0 + FfxFloat16x4 v0 = v; + FfxFloat16x4 v1 = QuadReadAcrossX(v); + FfxFloat16x4 v2 = QuadReadAcrossY(v); + FfxFloat16x4 v3 = QuadReadAcrossDiagonal(v); + return SpdReduce4H(v0, v1, v2, v3); +/* + // if SM6.0 is not available, you can use the AMD shader intrinsics + // the AMD shader intrinsics are available in AMD GPU Services (AGS) library: + // https://gpuopen.com/amd-gpu-services-ags-library/ + // works for DX11 + FfxFloat16x4 v0 = v; + FfxFloat16x4 v1; + v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + FfxFloat16x4 v2; + v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + FfxFloat16x4 v3; + v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + return SpdReduce4H(v0, v1, v2, v3); + */ +#endif + return FfxFloat16x4(0.0, 0.0, 0.0, 0.0); +} + +FfxFloat16x4 SpdReduceIntermediateH(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3) +{ + FfxFloat16x4 v0 = SpdLoadIntermediateH(i0.x, i0.y); + FfxFloat16x4 v1 = SpdLoadIntermediateH(i1.x, i1.y); + FfxFloat16x4 v2 = SpdLoadIntermediateH(i2.x, i2.y); + FfxFloat16x4 v3 = SpdLoadIntermediateH(i3.x, i3.y); + return SpdReduce4H(v0, v1, v2, v3); +} + +FfxFloat16x4 SpdReduceLoad4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice) +{ + FfxFloat16x4 v0 = SpdLoadH(FfxInt32x2(i0), slice); + FfxFloat16x4 v1 = SpdLoadH(FfxInt32x2(i1), slice); + FfxFloat16x4 v2 = SpdLoadH(FfxInt32x2(i2), slice); + FfxFloat16x4 v3 = SpdLoadH(FfxInt32x2(i3), slice); + return SpdReduce4H(v0, v1, v2, v3); +} + +FfxFloat16x4 SpdReduceLoad4H(FfxUInt32x2 base, FfxUInt32 slice) +{ + return SpdReduceLoad4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice); +} + +FfxFloat16x4 SpdReduceLoadSourceImage4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice) +{ + FfxFloat16x4 v0 = SpdLoadSourceImageH(FfxInt32x2(i0), slice); + FfxFloat16x4 v1 = SpdLoadSourceImageH(FfxInt32x2(i1), slice); + FfxFloat16x4 v2 = SpdLoadSourceImageH(FfxInt32x2(i2), slice); + FfxFloat16x4 v3 = SpdLoadSourceImageH(FfxInt32x2(i3), slice); + return SpdReduce4H(v0, v1, v2, v3); +} + +FfxFloat16x4 SpdReduceLoadSourceImageH(FfxUInt32x2 base, FfxUInt32 slice) +{ +#if defined(SPD_LINEAR_SAMPLER) + return SpdLoadSourceImageH(FfxInt32x2(base), slice); +#else + return SpdReduceLoadSourceImage4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice); +#endif +} + +void SpdDownsampleMips_0_1_IntrinsicsH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice) +{ + FfxFloat16x4 v[4]; + + FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2); + FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y); + v[0] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[0], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y); + v[1] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[1], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16); + v[2] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[2], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16); + v[3] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[3], 0, slice); + + if (mips <= 1) + return; + + v[0] = SpdReduceQuadH(v[0]); + v[1] = SpdReduceQuadH(v[1]); + v[2] = SpdReduceQuadH(v[2]); + v[3] = SpdReduceQuadH(v[3]); + + if ((localInvocationIndex % 4) == 0) + { + SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2), v[0], 1, slice); + SpdStoreIntermediateH(x / 2, y / 2, v[0]); + + SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2), v[1], 1, slice); + SpdStoreIntermediateH(x / 2 + 8, y / 2, v[1]); + + SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2 + 8), v[2], 1, slice); + SpdStoreIntermediateH(x / 2, y / 2 + 8, v[2]); + + SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2 + 8), v[3], 1, slice); + SpdStoreIntermediateH(x / 2 + 8, y / 2 + 8, v[3]); + } +} + +void SpdDownsampleMips_0_1_LDSH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice) +{ + FfxFloat16x4 v[4]; + + FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2); + FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y); + v[0] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[0], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y); + v[1] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[1], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16); + v[2] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[2], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16); + v[3] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[3], 0, slice); + + if (mips <= 1) + return; + + for (FfxInt32 i = 0; i < 4; i++) + { + SpdStoreIntermediateH(x, y, v[i]); + ffxSpdWorkgroupShuffleBarrier(); + if (localInvocationIndex < 64) + { + v[i] = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); + SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice); + } + ffxSpdWorkgroupShuffleBarrier(); + } + + if (localInvocationIndex < 64) + { + SpdStoreIntermediateH(x + 0, y + 0, v[0]); + SpdStoreIntermediateH(x + 8, y + 0, v[1]); + SpdStoreIntermediateH(x + 0, y + 8, v[2]); + SpdStoreIntermediateH(x + 8, y + 8, v[3]); + } +} + +void SpdDownsampleMips_0_1H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice) +{ +#if defined(FFXM_SPD_NO_WAVE_OPERATIONS) + SpdDownsampleMips_0_1_LDSH(x, y, workGroupID, localInvocationIndex, mips, slice); +#else + SpdDownsampleMips_0_1_IntrinsicsH(x, y, workGroupID, localInvocationIndex, mips, slice); +#endif +} + + +void SpdDownsampleMip_2H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#if defined(FFXM_SPD_NO_WAVE_OPERATIONS) + if (localInvocationIndex < 64) + { + FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); + SpdStoreH(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x, y), v, mip, slice); + // store to LDS, try to reduce bank conflicts + // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 + // ... + // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 + SpdStoreIntermediateH(x * 2 + y % 2, y * 2, v); + } +#else + FfxFloat16x4 v = SpdLoadIntermediateH(x, y); + v = SpdReduceQuadH(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStoreH(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x / 2, y / 2), v, mip, slice); + SpdStoreIntermediateH(x + (y / 2) % 2, y, v); + } +#endif +} + +void SpdDownsampleMip_3H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#if defined(FFXM_SPD_NO_WAVE_OPERATIONS) + if (localInvocationIndex < 16) + { + // x 0 x 0 + // 0 0 0 0 + // 0 x 0 x + // 0 0 0 0 + FfxFloat16x4 v = + SpdReduceIntermediateH(FfxUInt32x2(x * 4 + 0 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 2 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 0 + 1, y * 4 + 2), FfxUInt32x2(x * 4 + 2 + 1, y * 4 + 2)); + SpdStoreH(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x, y), v, mip, slice); + // store to LDS + // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 + // ... + // 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 + // ... + // 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x + // ... + SpdStoreIntermediateH(x * 4 + y, y * 4, v); + } +#else + if (localInvocationIndex < 64) + { + FfxFloat16x4 v = SpdLoadIntermediateH(x * 2 + y % 2, y * 2); + v = SpdReduceQuadH(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStoreH(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x / 2, y / 2), v, mip, slice); + SpdStoreIntermediateH(x * 2 + y / 2, y * 2, v); + } + } +#endif +} + +void SpdDownsampleMip_4H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#if defined(FFXM_SPD_NO_WAVE_OPERATIONS) + if (localInvocationIndex < 4) + { + // x 0 0 0 x 0 0 0 + // ... + // 0 x 0 0 0 x 0 0 + FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 8 + 0 + 0 + y * 2, y * 8 + 0), + FfxUInt32x2(x * 8 + 4 + 0 + y * 2, y * 8 + 0), + FfxUInt32x2(x * 8 + 0 + 1 + y * 2, y * 8 + 4), + FfxUInt32x2(x * 8 + 4 + 1 + y * 2, y * 8 + 4)); + SpdStoreH(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x, y), v, mip, slice); + // store to LDS + // x x x x 0 ... + // 0 ... + SpdStoreIntermediateH(x + y * 2, 0, v); + } +#else + if (localInvocationIndex < 16) + { + FfxFloat16x4 v = SpdLoadIntermediateH(x * 4 + y, y * 4); + v = SpdReduceQuadH(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStoreH(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x / 2, y / 2), v, mip, slice); + SpdStoreIntermediateH(x / 2 + y, 0, v); + } + } +#endif +} + +void SpdDownsampleMip_5H(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#if defined(FFXM_SPD_NO_WAVE_OPERATIONS) + if (localInvocationIndex < 1) + { + // x x x x 0 ... + // 0 ... + FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(0, 0), FfxUInt32x2(1, 0), FfxUInt32x2(2, 0), FfxUInt32x2(3, 0)); + SpdStoreH(FfxInt32x2(workGroupID.xy), v, mip, slice); + } +#else + if (localInvocationIndex < 4) + { + FfxFloat16x4 v = SpdLoadIntermediateH(localInvocationIndex, 0); + v = SpdReduceQuadH(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStoreH(FfxInt32x2(workGroupID.xy), v, mip, slice); + } + } +#endif +} + +void SpdDownsampleMips_6_7H(FfxUInt32 x, FfxUInt32 y, FfxUInt32 mips, FfxUInt32 slice) +{ + FfxInt32x2 tex = FfxInt32x2(x * 4 + 0, y * 4 + 0); + FfxInt32x2 pix = FfxInt32x2(x * 2 + 0, y * 2 + 0); + FfxFloat16x4 v0 = SpdReduceLoad4H(tex, slice); + SpdStoreH(pix, v0, 6, slice); + + tex = FfxInt32x2(x * 4 + 2, y * 4 + 0); + pix = FfxInt32x2(x * 2 + 1, y * 2 + 0); + FfxFloat16x4 v1 = SpdReduceLoad4H(tex, slice); + SpdStoreH(pix, v1, 6, slice); + + tex = FfxInt32x2(x * 4 + 0, y * 4 + 2); + pix = FfxInt32x2(x * 2 + 0, y * 2 + 1); + FfxFloat16x4 v2 = SpdReduceLoad4H(tex, slice); + SpdStoreH(pix, v2, 6, slice); + + tex = FfxInt32x2(x * 4 + 2, y * 4 + 2); + pix = FfxInt32x2(x * 2 + 1, y * 2 + 1); + FfxFloat16x4 v3 = SpdReduceLoad4H(tex, slice); + SpdStoreH(pix, v3, 6, slice); + + if (mips < 8) + return; + // no barrier needed, working on values only from the same thread + + FfxFloat16x4 v = SpdReduce4H(v0, v1, v2, v3); + SpdStoreH(FfxInt32x2(x, y), v, 7, slice); + SpdStoreIntermediateH(x, y, v); +} + +void SpdDownsampleNextFourH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 baseMip, FfxUInt32 mips, FfxUInt32 slice) +{ + if (mips <= baseMip) + return; + ffxSpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_2H(x, y, workGroupID, localInvocationIndex, baseMip, slice); + + if (mips <= baseMip + 1) + return; + ffxSpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_3H(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice); + + if (mips <= baseMip + 2) + return; + ffxSpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_4H(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice); + + if (mips <= baseMip + 3) + return; + ffxSpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_5H(workGroupID, localInvocationIndex, baseMip + 3, slice); +} + +/// Downsamples a 64x64 tile based on the work group id and work group offset. +/// If after downsampling it's the last active thread group, computes the remaining MIP levels. +/// Uses half types. +/// +/// @param [in] workGroupID index of the work group / thread group +/// @param [in] localInvocationIndex index of the thread within the thread group in 1D +/// @param [in] mips the number of total MIP levels to compute for the input texture +/// @param [in] numWorkGroups the total number of dispatched work groups / thread groups for this slice +/// @param [in] slice the slice of the input texture +/// +/// @ingroup FfxGPUSpd +void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice) +{ + FfxUInt32x2 sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64); + FfxUInt32 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2); + FfxUInt32 y = sub_xy.y + 8 * ((localInvocationIndex >> 7)); + + // compute MIP level 0 and 1 + SpdDownsampleMips_0_1H(x, y, workGroupID, localInvocationIndex, mips, slice); + + // compute MIP level 2, 3, 4, 5 + SpdDownsampleNextFourH(x, y, workGroupID, localInvocationIndex, 2, mips, slice); + + if (mips < 7) + return; + + // increase the global atomic counter for the given slice and check if it's the last remaining thread group: + // terminate if not, continue if yes. + if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice)) + return; + + // reset the global atomic counter back to 0 for the next spd dispatch + SpdResetAtomicCounter(slice); + + // After mip 5 there is only a single workgroup left that downsamples the remaining up to 64x64 texels. + // compute MIP level 6 and 7 + SpdDownsampleMips_6_7H(x, y, mips, slice); + + // compute MIP level 8, 9, 10, 11 + SpdDownsampleNextFourH(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice); +} + +/// Downsamples a 64x64 tile based on the work group id and work group offset. +/// If after downsampling it's the last active thread group, computes the remaining MIP levels. +/// Uses half types. +/// +/// @param [in] workGroupID index of the work group / thread group +/// @param [in] localInvocationIndex index of the thread within the thread group in 1D +/// @param [in] mips the number of total MIP levels to compute for the input texture +/// @param [in] numWorkGroups the total number of dispatched work groups / thread groups for this slice +/// @param [in] slice the slice of the input texture +/// @param [in] workGroupOffset the work group offset. it's (0,0) in case the entire input texture is downsampled. +/// +/// @ingroup FfxGPUSpd +void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset) +{ + SpdDownsampleH(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice); +} + +#endif // #if FFXM_HALF +#endif // #if defined(FFXM_GPU) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd/ffxm_spd.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd/ffxm_spd.h.meta new file mode 100644 index 0000000..a2617bf --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd/ffxm_spd.h.meta @@ -0,0 +1,76 @@ +fileFormatVersion: 2 +guid: 8e7a668559e3ae0419884ca7d3534a47 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Android: 1 + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Android: Android + second: + enabled: 0 + settings: + AndroidSharedLibraryType: Executable + CPU: ARMv7 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + CPU: AnyCPU + DefaultValueInitialized: true + OS: AnyOS + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: From c27195fb9a40d84ff7cbbe89645b033df488b7e1 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Wed, 19 Mar 2025 19:51:57 +0100 Subject: [PATCH 30/88] Defined a vertex/fragment shader for the accumulate pass, with initial modifications to make things compile as a proof of concept: - Removed vk::binding directives, as they are only meant for Vulkan and don't mean anything to Unity - Initialize inout struct as zero to stop Unity compiler complaints - Renamed vertex main function to VertMain to prevent duplicate main function declaration - Removed duplicate VertexOut struct declaration - Removed unused header includes in vertex shader code, preventing duplicate declarations - Fixed up a few relative header include paths --- .../Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader | 23 +++++ .../ASR/Shaders/ffxm_fsr2_fs.shader.meta | 9 ++ .../shaders/ffxm_fsr2_accumulate_pass_fs.hlsl | 5 - .../ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl | 5 +- .../shaders/fsr2/ffxm_fsr2_accumulate.h | 2 + .../shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h | 98 +++++++++---------- 6 files changed, 84 insertions(+), 58 deletions(-) create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader new file mode 100644 index 0000000..615bdae --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader @@ -0,0 +1,23 @@ +Shader "TND/ASR/ffx_fsr2_fs" +{ + SubShader + { + Cull Off ZWrite Off ZTest Always + + Pass + { + Name "Accumulate" + + HLSLPROGRAM + #pragma vertex VertMain + #pragma fragment main + #pragma target 4.5 + #pragma enable_d3d11_debug_symbols + + #include "shaders/ffxm_fsr2_vs.hlsl" + #include "shaders/ffxm_fsr2_accumulate_pass_fs.hlsl" + + ENDHLSL + } + } +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader.meta new file mode 100644 index 0000000..256f8a3 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader.meta @@ -0,0 +1,9 @@ +fileFormatVersion: 2 +guid: 147cc2cffac69ef4eb3ea8addafc9d10 +ShaderImporter: + externalObjects: {} + defaultTextures: [] + nonModifiableTextures: [] + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl index 581eecf..7f78bbb 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl @@ -59,11 +59,6 @@ #include "fsr2/ffxm_fsr2_reproject.h" #include "fsr2/ffxm_fsr2_accumulate.h" -struct VertexOut -{ - float4 position : SV_POSITION; -}; - struct AccumulateOutputsFS { #if !FFXM_SHADER_QUALITY_BALANCED_OR_PERFORMANCE diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl index d657150..edb6a70 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl @@ -32,15 +32,12 @@ #define FSR2_BIND_CB_FSR2 0 -#include "fsr2/ffxm_fsr2_callbacks_hlsl.h" -#include "fsr2/ffxm_fsr2_common.h" - struct VertexOut { float4 position : SV_POSITION; }; -VertexOut main(uint uVertexId : SV_VERTEXID) +VertexOut VertMain(uint uVertexId : SV_VERTEXID) { VertexOut output; float2 uv = float2(uVertexId & 1, uVertexId >> 1) * 2.0; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h index 3cd15ae..cb2ab5f 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h @@ -244,6 +244,8 @@ FfxFloat32 ComputeTemporalReactiveFactor(const AccumulationPassCommonParams para void initReactiveMaskFactors(FFXM_PARAMETER_INOUT AccumulationPassCommonParams params) { + params = (AccumulationPassCommonParams)0; + const FFXM_MIN16_F2 fDilatedReactiveMasks = FFXM_MIN16_F2(SampleDilatedReactiveMasks(params.fLrUv_HwSampler)); params.fDilatedReactiveFactor = fDilatedReactiveMasks.x; params.fAccumulationMask = fDilatedReactiveMasks.y; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h index 7a0ba61..649fb78 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h @@ -19,14 +19,14 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -#include "./fsr2/ffxm_fsr2_resources.h" +#include "./ffxm_fsr2_resources.h" #if defined(FFXM_GPU) #ifdef __hlsl_dx_compiler #pragma dxc diagnostic push #pragma dxc diagnostic ignored "-Wambig-lit-shift" #endif //__hlsl_dx_compiler -#include "./ffxm_core.h" +#include "../ffxm_core.h" #ifdef __hlsl_dx_compiler #pragma dxc diagnostic pop #endif //__hlsl_dx_compiler @@ -57,7 +57,7 @@ #endif #if defined(FSR2_BIND_CB_FSR2) - [[vk::binding(FSR2_BIND_CB_FSR2 + SET_0_CB_START, 0)]] cbuffer cbFSR2 : FFXM_FSR2_DECLARE_CB(FSR2_BIND_CB_FSR2) + cbuffer cbFSR2 : FFXM_FSR2_DECLARE_CB(FSR2_BIND_CB_FSR2) { FfxInt32x2 iRenderSize; FfxInt32x2 iMaxRenderSize; @@ -225,7 +225,7 @@ FfxFloat32 ViewSpaceToMetersFactor() #endif // #if FFXM_FSR2_EMBED_ROOTSIG #if defined(FSR2_BIND_CB_RCAS) -[[vk::binding(FSR2_BIND_CB_RCAS + SET_0_CB_START, 0)]] cbuffer cbRCAS : FFXM_FSR2_DECLARE_CB(FSR2_BIND_CB_RCAS) +cbuffer cbRCAS : FFXM_FSR2_DECLARE_CB(FSR2_BIND_CB_RCAS) { FfxUInt32x4 rcasConfig; }; @@ -238,7 +238,7 @@ FfxUInt32x4 RCASConfig() #if defined(FSR2_BIND_CB_REACTIVE) -[[vk::binding(FSR2_BIND_CB_REACTIVE + SET_0_CB_START, 0)]] cbuffer cbGenerateReactive : FFXM_FSR2_DECLARE_CB(FSR2_BIND_CB_REACTIVE) +cbuffer cbGenerateReactive : FFXM_FSR2_DECLARE_CB(FSR2_BIND_CB_REACTIVE) { FfxFloat32 gen_reactive_scale; FfxFloat32 gen_reactive_threshold; @@ -268,7 +268,7 @@ FfxUInt32 GenReactiveFlags() #endif // #if defined(FSR2_BIND_CB_REACTIVE) #if defined(FSR2_BIND_CB_SPD) -[[vk::binding(FSR2_BIND_CB_SPD + SET_0_CB_START, 0)]] cbuffer cbSPD : FFXM_FSR2_DECLARE_CB(FSR2_BIND_CB_SPD) { +cbuffer cbSPD : FFXM_FSR2_DECLARE_CB(FSR2_BIND_CB_SPD) { FfxUInt32 mips; FfxUInt32 numWorkGroups; @@ -297,136 +297,136 @@ FfxUInt32x2 SPD_RenderSize() } #endif // #if defined(FSR2_BIND_CB_SPD) -[[vk::binding(0, 0)]] SamplerState s_PointClamp : register(s0); -[[vk::binding(1, 0)]] SamplerState s_LinearClamp : register(s1); +SamplerState s_PointClamp : register(s0); +SamplerState s_LinearClamp : register(s1); // SRVs #if defined FSR2_BIND_SRV_INPUT_COLOR - [[vk::binding(FSR2_BIND_SRV_INPUT_COLOR, 1)]] Texture2D r_input_color_jittered : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_COLOR); + Texture2D r_input_color_jittered : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_COLOR); #endif #if defined FSR2_BIND_SRV_INPUT_OPAQUE_ONLY - [[vk::binding(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY, 1)]] Texture2D r_input_opaque_only : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY); + Texture2D r_input_opaque_only : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY); #endif #if defined FSR2_BIND_SRV_INPUT_MOTION_VECTORS - [[vk::binding(FSR2_BIND_SRV_INPUT_MOTION_VECTORS, 1)]] Texture2D r_input_motion_vectors : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_MOTION_VECTORS); + Texture2D r_input_motion_vectors : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_MOTION_VECTORS); #endif #if defined FSR2_BIND_SRV_INPUT_DEPTH - [[vk::binding(FSR2_BIND_SRV_INPUT_DEPTH, 1)]] Texture2D r_input_depth : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_DEPTH); + Texture2D r_input_depth : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_DEPTH); #endif #if defined FSR2_BIND_SRV_INPUT_EXPOSURE - [[vk::binding(FSR2_BIND_SRV_INPUT_EXPOSURE, 1)]] Texture2D r_input_exposure : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_EXPOSURE); + Texture2D r_input_exposure : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_EXPOSURE); #endif #if defined FSR2_BIND_SRV_AUTO_EXPOSURE - [[vk::binding(FSR2_BIND_SRV_AUTO_EXPOSURE, 1)]] Texture2D r_auto_exposure : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_AUTO_EXPOSURE); + Texture2D r_auto_exposure : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_AUTO_EXPOSURE); #endif #if defined FSR2_BIND_SRV_REACTIVE_MASK - [[vk::binding(FSR2_BIND_SRV_REACTIVE_MASK, 1)]] Texture2D r_reactive_mask : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_REACTIVE_MASK); + Texture2D r_reactive_mask : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_REACTIVE_MASK); #endif #if defined FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK - [[vk::binding(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK, 1)]] Texture2D r_transparency_and_composition_mask : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK); + Texture2D r_transparency_and_composition_mask : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK); #endif #if defined FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH - [[vk::binding(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH, 1)]] Texture2D r_reconstructed_previous_nearest_depth : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH); + Texture2D r_reconstructed_previous_nearest_depth : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH); #endif #if defined FSR2_BIND_SRV_DILATED_MOTION_VECTORS - [[vk::binding(FSR2_BIND_SRV_DILATED_MOTION_VECTORS, 1)]] Texture2D r_dilated_motion_vectors : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_MOTION_VECTORS); + Texture2D r_dilated_motion_vectors : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_MOTION_VECTORS); #endif #if defined FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS - [[vk::binding(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS, 1)]] Texture2D r_previous_dilated_motion_vectors : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS); + Texture2D r_previous_dilated_motion_vectors : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS); #endif #if defined FSR2_BIND_SRV_DILATED_DEPTH - [[vk::binding(FSR2_BIND_SRV_DILATED_DEPTH, 1)]] Texture2D r_dilatedDepth : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_DEPTH); + Texture2D r_dilatedDepth : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_DEPTH); #endif #if defined FSR2_BIND_SRV_INTERNAL_UPSCALED - [[vk::binding(FSR2_BIND_SRV_INTERNAL_UPSCALED, 1)]] Texture2D r_internal_upscaled_color : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INTERNAL_UPSCALED); + Texture2D r_internal_upscaled_color : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INTERNAL_UPSCALED); #endif #if defined FSR2_BIND_SRV_LOCK_STATUS - [[vk::binding(FSR2_BIND_SRV_LOCK_STATUS, 1)]] Texture2D r_lock_status : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_STATUS); + Texture2D r_lock_status : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_STATUS); #endif #if defined FSR2_BIND_SRV_LOCK_INPUT_LUMA - [[vk::binding(FSR2_BIND_SRV_LOCK_INPUT_LUMA, 1)]] Texture2D r_lock_input_luma : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_INPUT_LUMA); + Texture2D r_lock_input_luma : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_INPUT_LUMA); #endif #if defined FSR2_BIND_SRV_NEW_LOCKS - [[vk::binding(FSR2_BIND_SRV_NEW_LOCKS, 1)]] Texture2D r_new_locks : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_NEW_LOCKS); + Texture2D r_new_locks : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_NEW_LOCKS); #endif #if defined FSR2_BIND_SRV_PREPARED_INPUT_COLOR - [[vk::binding(FSR2_BIND_SRV_PREPARED_INPUT_COLOR, 1)]] Texture2D r_prepared_input_color : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR); + Texture2D r_prepared_input_color : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR); #endif #if defined FSR2_BIND_SRV_LUMA_HISTORY - [[vk::binding(FSR2_BIND_SRV_LUMA_HISTORY, 1)]] Texture2D r_luma_history : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY); + Texture2D r_luma_history : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY); #endif #if defined FSR2_BIND_SRV_RCAS_INPUT - [[vk::binding(FSR2_BIND_SRV_RCAS_INPUT, 1)]] Texture2D r_rcas_input : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RCAS_INPUT); + Texture2D r_rcas_input : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RCAS_INPUT); #endif #if defined FSR2_BIND_SRV_LANCZOS_LUT - [[vk::binding(FSR2_BIND_SRV_LANCZOS_LUT, 1)]] Texture2D r_lanczos_lut : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LANCZOS_LUT); + Texture2D r_lanczos_lut : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LANCZOS_LUT); #endif #if defined FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS - [[vk::binding(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS, 1)]] Texture2D r_imgMips : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS); + Texture2D r_imgMips : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS); #endif #if defined FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT - [[vk::binding(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT, 1)]] Texture2D r_upsample_maximum_bias_lut : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT); + Texture2D r_upsample_maximum_bias_lut : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT); #endif #if defined FSR2_BIND_SRV_DILATED_REACTIVE_MASKS - [[vk::binding(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS, 1)]] Texture2D r_dilated_reactive_masks : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS); + Texture2D r_dilated_reactive_masks : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS); #endif #if defined FSR2_BIND_SRV_TEMPORAL_REACTIVE - [[vk::binding(FSR2_BIND_SRV_TEMPORAL_REACTIVE, 1)]] Texture2D r_internal_temporal_reactive : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_TEMPORAL_REACTIVE); + Texture2D r_internal_temporal_reactive : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_TEMPORAL_REACTIVE); #endif // UAV declarations #if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH - [[vk::binding(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, 1)]] RWTexture2D rw_reconstructed_previous_nearest_depth : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH); + RWTexture2D rw_reconstructed_previous_nearest_depth : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH); #endif #if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS - [[vk::binding(FSR2_BIND_UAV_DILATED_MOTION_VECTORS, 1)]] RWTexture2D rw_dilated_motion_vectors : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_MOTION_VECTORS); + RWTexture2D rw_dilated_motion_vectors : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_MOTION_VECTORS); #endif #if defined FSR2_BIND_UAV_DILATED_DEPTH - [[vk::binding(FSR2_BIND_UAV_DILATED_DEPTH, 1)]] RWTexture2D rw_dilatedDepth : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_DEPTH); + RWTexture2D rw_dilatedDepth : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_DEPTH); #endif #if defined FSR2_BIND_UAV_INTERNAL_UPSCALED - [[vk::binding(FSR2_BIND_UAV_INTERNAL_UPSCALED, 1)]] RWTexture2D rw_internal_upscaled_color : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_INTERNAL_UPSCALED); + RWTexture2D rw_internal_upscaled_color : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_INTERNAL_UPSCALED); #endif #if defined FSR2_BIND_UAV_LOCK_STATUS - [[vk::binding(FSR2_BIND_UAV_LOCK_STATUS, 1)]] RWTexture2D rw_lock_status : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_STATUS); + RWTexture2D rw_lock_status : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_STATUS); #endif #if defined FSR2_BIND_UAV_LOCK_INPUT_LUMA - [[vk::binding(FSR2_BIND_UAV_LOCK_INPUT_LUMA, 1)]] RWTexture2D rw_lock_input_luma : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_INPUT_LUMA); + RWTexture2D rw_lock_input_luma : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_INPUT_LUMA); #endif #if defined FSR2_BIND_UAV_NEW_LOCKS - [[vk::binding(FSR2_BIND_UAV_NEW_LOCKS, 1)]] RWTexture2D rw_new_locks : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_NEW_LOCKS); + RWTexture2D rw_new_locks : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_NEW_LOCKS); #endif #if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR - [[vk::binding(FSR2_BIND_UAV_PREPARED_INPUT_COLOR, 1)]] RWTexture2D rw_prepared_input_color : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR); + RWTexture2D rw_prepared_input_color : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR); #endif #if defined FSR2_BIND_UAV_LUMA_HISTORY - [[vk::binding(FSR2_BIND_UAV_LUMA_HISTORY, 1)]] RWTexture2D rw_luma_history : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY); + RWTexture2D rw_luma_history : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY); #endif #if defined FSR2_BIND_UAV_UPSCALED_OUTPUT - [[vk::binding(FSR2_BIND_UAV_UPSCALED_OUTPUT, 1)]] RWTexture2D rw_upscaled_output : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_UPSCALED_OUTPUT); + RWTexture2D rw_upscaled_output : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_UPSCALED_OUTPUT); #endif #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE - [[vk::binding(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, 1)]] globallycoherent RWTexture2D rw_img_mip_shading_change : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE); + globallycoherent RWTexture2D rw_img_mip_shading_change : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE); #endif #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 - [[vk::binding(FSR2_BIND_UAV_EXPOSURE_MIP_5, 1)]] globallycoherent RWTexture2D rw_img_mip_5 : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_5); + globallycoherent RWTexture2D rw_img_mip_5 : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_5); #endif #if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS - [[vk::binding(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, 1)]] RWTexture2D rw_dilated_reactive_masks : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS); + RWTexture2D rw_dilated_reactive_masks : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS); #endif #if defined FSR2_BIND_UAV_EXPOSURE - [[vk::binding(FSR2_BIND_UAV_EXPOSURE, 1)]] RWTexture2D rw_exposure : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE); + RWTexture2D rw_exposure : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE); #endif #if defined FSR2_BIND_UAV_AUTO_EXPOSURE - [[vk::binding(FSR2_BIND_UAV_AUTO_EXPOSURE, 1)]] RWTexture2D rw_auto_exposure : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTO_EXPOSURE); + RWTexture2D rw_auto_exposure : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTO_EXPOSURE); #endif #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC - [[vk::binding(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC, 1)]] globallycoherent RWTexture2D rw_spd_global_atomic : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC); + globallycoherent RWTexture2D rw_spd_global_atomic : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC); #endif #if defined FSR2_BIND_UAV_AUTOREACTIVE - [[vk::binding(FSR2_BIND_UAV_AUTOREACTIVE, 1)]] RWTexture2D rw_output_autoreactive : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTOREACTIVE); + RWTexture2D rw_output_autoreactive : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTOREACTIVE); #endif #if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) From 1551a3ccf803b8505a21cd133e60e5218b87fcc9 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Wed, 19 Mar 2025 20:04:23 +0100 Subject: [PATCH 31/88] Added the remaining vertex/fragment shader passes and made the same modifications to get them to compile --- .../ASR/Shaders/ffxm_fsr2_common.cginc | 1 + .../ASR/Shaders/ffxm_fsr2_common.cginc.meta | 3 + .../Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader | 73 ++++++++++++++++++- .../ffxm_fsr2_autogen_reactive_pass_fs.hlsl | 5 -- .../shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl | 6 -- .../shaders/ffxm_fsr2_rcas_pass_fs.hlsl | 5 -- ...r2_reconstruct_previous_depth_pass_fs.hlsl | 5 -- .../ASR/Shaders/shaders/fsr2/ffxm_fsr2_rcas.h | 4 +- 8 files changed, 77 insertions(+), 25 deletions(-) create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc new file mode 100644 index 0000000..5f28270 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc.meta new file mode 100644 index 0000000..2bfbafa --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: 6836bc2e151d44e0bd1991fd7bfaee30 +timeCreated: 1742410813 \ No newline at end of file diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader index 615bdae..4e1e847 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader @@ -4,7 +4,58 @@ Shader "TND/ASR/ffx_fsr2_fs" { Cull Off ZWrite Off ZTest Always - Pass + Pass // 0 + { + Name "Auto-Generate Reactive Mask" + + HLSLPROGRAM + #pragma vertex VertMain + #pragma fragment main + #pragma target 4.5 + //#pragma enable_d3d11_debug_symbols + + #include "ffxm_fsr2_common.cginc" + #include "shaders/ffxm_fsr2_vs.hlsl" + #include "shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl" + + ENDHLSL + } + + Pass // 1 + { + Name "Reconstruct Previous Depth" + + HLSLPROGRAM + #pragma vertex VertMain + #pragma fragment main + #pragma target 4.5 + //#pragma enable_d3d11_debug_symbols + + #include "ffxm_fsr2_common.cginc" + #include "shaders/ffxm_fsr2_vs.hlsl" + #include "shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl" + + ENDHLSL + } + + Pass // 2 + { + Name "Depth Clip" + + HLSLPROGRAM + #pragma vertex VertMain + #pragma fragment main + #pragma target 4.5 + //#pragma enable_d3d11_debug_symbols + + #include "ffxm_fsr2_common.cginc" + #include "shaders/ffxm_fsr2_vs.hlsl" + #include "shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl" + + ENDHLSL + } + + Pass // 3 { Name "Accumulate" @@ -12,12 +63,30 @@ Shader "TND/ASR/ffx_fsr2_fs" #pragma vertex VertMain #pragma fragment main #pragma target 4.5 - #pragma enable_d3d11_debug_symbols + //#pragma enable_d3d11_debug_symbols + #include "ffxm_fsr2_common.cginc" #include "shaders/ffxm_fsr2_vs.hlsl" #include "shaders/ffxm_fsr2_accumulate_pass_fs.hlsl" ENDHLSL } + + Pass // 4 + { + Name "Sharpen" + + HLSLPROGRAM + #pragma vertex VertMain + #pragma fragment main + #pragma target 4.5 + //#pragma enable_d3d11_debug_symbols + + #include "ffxm_fsr2_common.cginc" + #include "shaders/ffxm_fsr2_vs.hlsl" + #include "shaders/ffxm_fsr2_rcas_pass_fs.hlsl" + + ENDHLSL + } } } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl index 1f1472f..124c73c 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl @@ -44,11 +44,6 @@ struct GenReactiveMaskOutputs FfxFloat32 fReactiveMask : SV_TARGET0; }; -struct VertexOut -{ - float4 position : SV_POSITION; -}; - GenReactiveMaskOutputs main(float4 SvPosition : SV_POSITION) { uint2 uPixelCoord = uint2(SvPosition.xy); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl index bd3723a..de93855 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl @@ -48,12 +48,6 @@ #include "fsr2/ffxm_fsr2_sample.h" #include "fsr2/ffxm_fsr2_depth_clip.h" -struct VertexOut -{ - float4 position : SV_POSITION; -}; - - struct DepthClipOutputsFS { FfxFloat32x2 fDilatedReactiveMasks : SV_TARGET0; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl index ad49951..95d6c61 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl @@ -40,11 +40,6 @@ #include "fsr2/ffxm_fsr2_common.h" #include "fsr2/ffxm_fsr2_rcas.h" -struct VertexOut -{ - float4 position : SV_POSITION; -}; - struct RCASOutputsFS { FfxFloat32x3 fUpscaledColor : SV_TARGET0; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl index ef0a1b8..5a71c68 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl @@ -44,11 +44,6 @@ #include "fsr2/ffxm_fsr2_sample.h" #include "fsr2/ffxm_fsr2_reconstruct_dilated_velocity_and_previous_depth.h" -struct VertexOut -{ - float4 position : SV_POSITION; -}; - struct ReconstructPrevDepthOutputsFS { FfxFloat32 fDepth : SV_TARGET0; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_rcas.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_rcas.h index d60784b..a0c5e5f 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_rcas.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_rcas.h @@ -22,7 +22,7 @@ #define GROUP_SIZE 8 #define FSR_RCAS_DENOISE 1 -#include "./ffxm_core.h" +#include "../ffxm_core.h" struct RCASOutputs { @@ -61,7 +61,7 @@ FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p) void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {} #endif -#include "./fsr1/ffxm_fsr1.h" +#include "../fsr1/ffxm_fsr1.h" void CurrFilter(FFXM_MIN16_U2 pos, FFXM_PARAMETER_INOUT RCASOutputs results) { From d77922cc4b2ae25e17814d3786a9b733e8200e01 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Wed, 19 Mar 2025 20:17:46 +0100 Subject: [PATCH 32/88] Backported fixes that we already know are going to be necessary for full Unity support: - Removed empty parantheses on GroupMemoryBarrier macro, which confuse the 2020.1 shader compiler - Added padding field to cbFSR2 to make it exactly 128 bytes in size, which gives correct buffer alignment on iOS Metal - Changed auto-exposure reset threshold value to 1e4f, as part of a fix for black screen flashes in OpenGL Core on Nvidia GPUs - Clamp luma to >= 0 in auto-exposure to fix artifacting in OpenGL Core on Nvidia GPUs - Removed #extension directives meant for GLSL, which cause shader compiler warnings in Unity --- .../Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_hlsl.h | 2 +- .../ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h | 2 ++ .../Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_common.h | 2 +- .../shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h | 1 + .../Effects/Upscaling/ASR/Shaders/shaders/spd/ffxm_spd.h | 6 +++--- 5 files changed, 8 insertions(+), 5 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_hlsl.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_hlsl.h index 9696c28..e77bb1d 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_hlsl.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_hlsl.h @@ -32,7 +32,7 @@ /// A define for abstracting compute memory barriers between shading languages. /// /// @ingroup HLSLCore -#define FFXM_GROUP_MEMORY_BARRIER() GroupMemoryBarrierWithGroupSync() +#define FFXM_GROUP_MEMORY_BARRIER GroupMemoryBarrierWithGroupSync() /// A define for abstracting compute atomic additions between shading languages. /// diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h index 649fb78..c277f7d 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h @@ -79,6 +79,8 @@ FfxFloat32 fDeltaTime; FfxFloat32 fDynamicResChangeFactor; FfxFloat32 fViewSpaceToMetersFactor; + + FfxFloat32 fPadding; }; #define FFXM_FSR2_CONSTANT_BUFFER_1_SIZE (sizeof(cbFSR2) / 4) // Number of 32-bit values. This must be kept in sync with the cbFSR2 size. diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_common.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_common.h index 4a13e6f..662ba2a 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_common.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_common.h @@ -57,7 +57,7 @@ FFXM_STATIC const FfxFloat32 fAverageLanczosWeightPerFrame = 0.74f * fUpsampleLa FFXM_STATIC const FfxFloat32 fAccumulationMaxOnMotion = 3.0f * fUpsampleLanczosWeightScale; // Auto exposure -FFXM_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e8f; +FFXM_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e4f; // Optimizations defines #ifndef FFXM_OPT_USE_GATHER_OPS diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h index ffef258..52ad315 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h @@ -82,6 +82,7 @@ void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 { FfxFloat32 rate = 1.0f; result = prev + (result - prev) * (1 - exp(-DeltaTime() * rate)); + result = ffxMax(0.0f, result); } FfxFloat32x2 spdOutput = FfxFloat32x2(ComputeAutoExposureFromLavg(result), result); SPD_SetExposureBuffer(spdOutput); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd/ffxm_spd.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd/ffxm_spd.h index 73b2af0..c9322ce 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd/ffxm_spd.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd/ffxm_spd.h @@ -119,12 +119,12 @@ FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFl //_____________________________________________________________/\_______________________________________________________________ #if defined(FFXM_GLSL) && !defined(FFXM_SPD_NO_WAVE_OPERATIONS) -#extension GL_KHR_shader_subgroup_quad:require +//#extension GL_KHR_shader_subgroup_quad:require #endif void ffxSpdWorkgroupShuffleBarrier() { - FFXM_GROUP_MEMORY_BARRIER(); + FFXM_GROUP_MEMORY_BARRIER; } // Only last active workgroup should proceed @@ -578,7 +578,7 @@ void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxU #if FFXM_HALF #if defined(FFXM_GLSL) -#extension GL_EXT_shader_subgroup_extended_types_float16:require +//#extension GL_EXT_shader_subgroup_extended_types_float16:require #endif FfxFloat16x4 SpdReduceQuadH(FfxFloat16x4 v) From a85e242d4c3695f398ea58b25eef4643b224f7f9 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Wed, 19 Mar 2025 21:49:42 +0100 Subject: [PATCH 33/88] Added shaders for the two compute passes, plus a few minor fixes --- .../Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc | 5 ++++- ...fxm_fsr2_compute_luminance_pyramid_pass.compute | 14 ++++++++++++++ ...sr2_compute_luminance_pyramid_pass.compute.meta | 3 +++ .../ASR/Shaders/ffxm_fsr2_lock_pass.compute | 12 ++++++++++++ .../ASR/Shaders/ffxm_fsr2_lock_pass.compute.meta | 3 +++ .../fsr2/ffxm_fsr2_compute_luminance_pyramid.h | 2 +- 6 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc index 5f28270..11f4edd 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc @@ -1 +1,4 @@ - \ No newline at end of file +#pragma warning(disable: 3078) // Loop control variable conflicts +#pragma warning(disable: 3203) // Signed/unsigned mismatch +#pragma warning(disable: 3205) // Conversion from larger type to smaller, possible loss of data +#pragma warning(disable: 3556) // Integer divides might be much slower, try using uints if possible diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute new file mode 100644 index 0000000..cab6cf5 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute @@ -0,0 +1,14 @@ +#pragma kernel main + +#pragma multi_compile_local __ FFXM_HALF +#pragma multi_compile_local __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile_local __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile_local __ FFXM_FSR2_OPTION_INVERTED_DEPTH + +#pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY + +#include "ffxm_fsr2_common.cginc" + +#define FFXM_SPD_NO_WAVE_OPERATIONS + +#include "shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl" diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute.meta new file mode 100644 index 0000000..9989db6 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: 57220d870cb441c8a6df8a9e15a74283 +timeCreated: 1742416757 \ No newline at end of file diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute new file mode 100644 index 0000000..9e3a2a4 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute @@ -0,0 +1,12 @@ +#pragma kernel main + +#pragma multi_compile_local __ FFXM_HALF +#pragma multi_compile_local __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile_local __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile_local __ FFXM_FSR2_OPTION_INVERTED_DEPTH + +#pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY + +#include "ffxm_fsr2_common.cginc" + +#include "shaders/ffxm_fsr2_lock_pass.hlsl" diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute.meta new file mode 100644 index 0000000..1b473ab --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: a6e1d5d5372d467790fcf2d089b50ef7 +timeCreated: 1742417134 \ No newline at end of file diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h index 52ad315..eb14e4b 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h @@ -188,7 +188,7 @@ FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxF } #endif -#include "./spd/ffxm_spd.h" +#include "../spd/ffxm_spd.h" void ComputeAutoExposure(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex) { From c8eebd63ed3a234b3c91c161f72356e7915573a5 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Wed, 19 Mar 2025 22:04:40 +0100 Subject: [PATCH 34/88] Added multi-compile keywords for all fragment shaders, and added Metal workaround for texture atomics --- .../ASR/Shaders/ffxm_fsr2_common.cginc | 7 +++++ .../Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader | 26 +++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc index 11f4edd..4d8ce66 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc @@ -2,3 +2,10 @@ #pragma warning(disable: 3203) // Signed/unsigned mismatch #pragma warning(disable: 3205) // Conversion from larger type to smaller, possible loss of data #pragma warning(disable: 3556) // Integer divides might be much slower, try using uints if possible + +// Work around the lack of texture atomics on Metal +#if defined(SHADER_API_METAL) +#define InterlockedAdd(dest, val, orig) { (orig) = (dest); (dest) += (val); } +#define InterlockedMin(dest, val) { (dest) = min((dest), (val)); } +#define InterlockedMax(dest, val) { (dest) = max((dest), (val)); } +#endif diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader index 4e1e847..9066d1e 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader @@ -13,6 +13,9 @@ Shader "TND/ASR/ffx_fsr2_fs" #pragma fragment main #pragma target 4.5 //#pragma enable_d3d11_debug_symbols + + #pragma multi_compile __ FFXM_HALF + #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY #include "ffxm_fsr2_common.cginc" #include "shaders/ffxm_fsr2_vs.hlsl" @@ -31,6 +34,13 @@ Shader "TND/ASR/ffx_fsr2_fs" #pragma target 4.5 //#pragma enable_d3d11_debug_symbols + #pragma multi_compile __ FFXM_HALF + #pragma multi_compile __ FFXM_FSR2_OPTION_HDR_COLOR_INPUT + #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS + #pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS + #pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH + #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY + #include "ffxm_fsr2_common.cginc" #include "shaders/ffxm_fsr2_vs.hlsl" #include "shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl" @@ -48,6 +58,12 @@ Shader "TND/ASR/ffx_fsr2_fs" #pragma target 4.5 //#pragma enable_d3d11_debug_symbols + #pragma multi_compile __ FFXM_HALF + #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS + #pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS + #pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH + #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY + #include "ffxm_fsr2_common.cginc" #include "shaders/ffxm_fsr2_vs.hlsl" #include "shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl" @@ -65,6 +81,14 @@ Shader "TND/ASR/ffx_fsr2_fs" #pragma target 4.5 //#pragma enable_d3d11_debug_symbols + #pragma multi_compile __ FFXM_HALF + #pragma multi_compile __ FFXM_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE + #pragma multi_compile __ FFXM_FSR2_OPTION_HDR_COLOR_INPUT + #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS + #pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS + #pragma multi_compile __ FFXM_FSR2_OPTION_APPLY_SHARPENING + #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY + #include "ffxm_fsr2_common.cginc" #include "shaders/ffxm_fsr2_vs.hlsl" #include "shaders/ffxm_fsr2_accumulate_pass_fs.hlsl" @@ -82,6 +106,8 @@ Shader "TND/ASR/ffx_fsr2_fs" #pragma target 4.5 //#pragma enable_d3d11_debug_symbols + #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY + #include "ffxm_fsr2_common.cginc" #include "shaders/ffxm_fsr2_vs.hlsl" #include "shaders/ffxm_fsr2_rcas_pass_fs.hlsl" From d3f60e2650e5018374f35f86140e97fe9ce637f9 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Thu, 20 Mar 2025 22:10:54 +0100 Subject: [PATCH 35/88] Added ASR runtime source files, as a copy of the FSR2 sources, with a bunch of renaming and stripped of some parts that we know we're not going to need. --- .../Effects/Upscaling/ASR/Runtime.meta | 8 + .../Effects/Upscaling/ASR/Runtime/Asr.cs | 292 +++++++++ .../Effects/Upscaling/ASR/Runtime/Asr.cs.meta | 11 + .../Upscaling/ASR/Runtime/AsrAssets.cs | 152 +++++ .../Upscaling/ASR/Runtime/AsrAssets.cs.meta | 11 + .../Upscaling/ASR/Runtime/AsrCallbacks.cs | 81 +++ .../ASR/Runtime/AsrCallbacks.cs.meta | 11 + .../Upscaling/ASR/Runtime/AsrContext.cs | 570 ++++++++++++++++++ .../Upscaling/ASR/Runtime/AsrContext.cs.meta | 11 + .../Effects/Upscaling/ASR/Runtime/AsrPass.cs | 339 +++++++++++ .../Upscaling/ASR/Runtime/AsrPass.cs.meta | 11 + .../Upscaling/ASR/Runtime/AsrResources.cs | 227 +++++++ .../ASR/Runtime/AsrResources.cs.meta | 11 + .../Upscaling/ASR/Runtime/AsrShaderIDs.cs | 75 +++ .../ASR/Runtime/AsrShaderIDs.cs.meta | 11 + .../Upscaling/ASR/Runtime/ResourceView.cs | 55 ++ .../ASR/Runtime/ResourceView.cs.meta | 11 + 17 files changed, 1887 insertions(+) create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrCallbacks.cs create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrCallbacks.cs.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrShaderIDs.cs create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrShaderIDs.cs.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/ResourceView.cs create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/ResourceView.cs.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime.meta new file mode 100644 index 0000000..3e15b23 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 6bbfbdd9fd482bd4ea5e998953ae9972 +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs new file mode 100644 index 0000000..a75fab0 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs @@ -0,0 +1,292 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using System; +using System.Runtime.InteropServices; +using UnityEngine; + +namespace ArmASR +{ + /// + /// A collection of helper functions and data structures required by the ASR process. + /// + public static class Asr + { + /// + /// Creates a new ASR context with standard parameters that are appropriate for the current platform. + /// + public static AsrContext CreateContext(Vector2Int displaySize, Vector2Int maxRenderSize, AsrShaders shaders, InitializationFlags flags = 0) + { + if (SystemInfo.usesReversedZBuffer) + flags |= InitializationFlags.EnableDepthInverted; + else + flags &= ~InitializationFlags.EnableDepthInverted; + +#if UNITY_EDITOR || DEVELOPMENT_BUILD + flags |= InitializationFlags.EnableDebugChecking; +#endif + + Debug.Log($"Setting up Arm ASR with render size: {maxRenderSize.x}x{maxRenderSize.y}, display size: {displaySize.x}x{displaySize.y}, flags: {flags}"); + + var contextDescription = new ContextDescription + { + Flags = flags, + DisplaySize = displaySize, + MaxRenderSize = maxRenderSize, + Shaders = shaders, + }; + + var context = new AsrContext(); + context.Create(contextDescription); + return context; + } + + public static float GetUpscaleRatioFromQualityMode(QualityMode qualityMode) + { + switch (qualityMode) + { + case QualityMode.NativeAA: + return 1.0f; + case QualityMode.UltraQuality: + return 1.2f; + case QualityMode.Quality: + return 1.5f; + case QualityMode.Balanced: + return 1.7f; + case QualityMode.Performance: + return 2.0f; + case QualityMode.UltraPerformance: + return 3.0f; + default: + return 1.0f; + } + } + + public static void GetRenderResolutionFromQualityMode( + out int renderWidth, out int renderHeight, + int displayWidth, int displayHeight, QualityMode qualityMode) + { + float ratio = GetUpscaleRatioFromQualityMode(qualityMode); + renderWidth = Mathf.RoundToInt(displayWidth / ratio); + renderHeight = Mathf.RoundToInt(displayHeight / ratio); + } + + public static float GetMipmapBiasOffset(int renderWidth, int displayWidth) + { + return Mathf.Log((float)renderWidth / displayWidth, 2.0f) - 1.0f; + } + + public static int GetJitterPhaseCount(int renderWidth, int displayWidth) + { + const float basePhaseCount = 8.0f; + int jitterPhaseCount = (int)(basePhaseCount * Mathf.Pow((float)displayWidth / renderWidth, 2.0f)); + return jitterPhaseCount; + } + + public static void GetJitterOffset(out float outX, out float outY, int index, int phaseCount) + { + outX = Halton((index % phaseCount) + 1, 2) - 0.5f; + outY = Halton((index % phaseCount) + 1, 3) - 0.5f; + } + + // Calculate halton number for index and base. + private static float Halton(int index, int @base) + { + float f = 1.0f, result = 0.0f; + + for (int currentIndex = index; currentIndex > 0;) { + + f /= @base; + result += f * (currentIndex % @base); + currentIndex = (int)Mathf.Floor((float)currentIndex / @base); + } + + return result; + } + + public static float Lanczos2(float value) + { + return Mathf.Abs(value) < Mathf.Epsilon ? 1.0f : Mathf.Sin(Mathf.PI * value) / (Mathf.PI * value) * (Mathf.Sin(0.5f * Mathf.PI * value) / (0.5f * Mathf.PI * value)); + } + +#if !UNITY_2021_1_OR_NEWER + internal static void SetBufferData(this CommandBuffer commandBuffer, ComputeBuffer computeBuffer, Array data) + { + commandBuffer.SetComputeBufferData(computeBuffer, data); + } +#endif + + public enum QualityMode + { + NativeAA = 0, + UltraQuality = 1, + Quality = 2, + Balanced = 3, + Performance = 4, + UltraPerformance = 5, + } + + [Flags] + public enum InitializationFlags + { + EnableHighDynamicRange = 1 << 0, + EnableDisplayResolutionMotionVectors = 1 << 1, + EnableMotionVectorsJitterCancellation = 1 << 2, + EnableDepthInverted = 1 << 3, + EnableDepthInfinite = 1 << 4, + EnableAutoExposure = 1 << 5, + EnableDynamicResolution = 1 << 6, + EnableFP16Usage = 1 << 7, + EnableDebugChecking = 1 << 8, + } + + /// + /// A structure encapsulating the parameters required to initialize FidelityFX Super Resolution 2 upscaling. + /// + public struct ContextDescription + { + public InitializationFlags Flags; + public Vector2Int MaxRenderSize; + public Vector2Int DisplaySize; + public AsrShaders Shaders; + } + + /// + /// A structure encapsulating the parameters for dispatching the various passes of FidelityFX Super Resolution 2. + /// + public class DispatchDescription + { + public ResourceView Color; + public ResourceView Depth; + public ResourceView MotionVectors; + public ResourceView Exposure; // optional + public ResourceView Reactive; // optional + public ResourceView TransparencyAndComposition; // optional + public ResourceView Output; + public Vector2 JitterOffset; + public Vector2 MotionVectorScale; + public Vector2Int RenderSize; + public Vector2Int InputResourceSize; + public bool EnableSharpening; + public float Sharpness; + public float FrameTimeDelta; // in seconds + public float PreExposure; + public bool Reset; + public float CameraNear; + public float CameraFar; + public float CameraFovAngleVertical; + public float ViewSpaceToMetersFactor; + public bool UseTextureArrays; // Enable texture array bindings, primarily used for HDRP and XR + } + + /// + /// A structure encapsulating the parameters for automatic generation of a reactive mask. + /// + public class GenerateReactiveDescription + { + public ResourceView ColorOpaqueOnly; + public ResourceView ColorPreUpscale; + public ResourceView OutReactive; + public Vector2Int RenderSize; + public float Scale = 0.5f; + public float CutoffThreshold = 0.2f; + public float BinaryValue = 0.9f; + public GenerateReactiveFlags Flags = GenerateReactiveFlags.ApplyTonemap | GenerateReactiveFlags.ApplyThreshold | GenerateReactiveFlags.UseComponentsMax; + } + + [Flags] + public enum GenerateReactiveFlags + { + ApplyTonemap = 1 << 0, + ApplyInverseTonemap = 1 << 1, + ApplyThreshold = 1 << 2, + UseComponentsMax = 1 << 3, + } + + [Serializable, StructLayout(LayoutKind.Sequential)] + internal struct UpscalerConstants + { + public Vector2Int renderSize; + public Vector2Int maxRenderSize; + public Vector2Int displaySize; + public Vector2Int inputColorResourceDimensions; + public Vector2Int lumaMipDimensions; + public int lumaMipLevelToUse; + public int frameIndex; + + public Vector4 deviceToViewDepth; + public Vector2 jitterOffset; + public Vector2 motionVectorScale; + public Vector2 downscaleFactor; + public Vector2 motionVectorJitterCancellation; + public float preExposure; + public float previousFramePreExposure; + public float tanHalfFOV; + public float jitterPhaseCount; + public float deltaTime; + public float dynamicResChangeFactor; + public float viewSpaceToMetersFactor; + public float padding; + } + + [Serializable, StructLayout(LayoutKind.Sequential)] + internal struct SpdConstants + { + public uint mips; + public uint numWorkGroups; + public uint workGroupOffsetX, workGroupOffsetY; + public uint renderSizeX, renderSizeY; + } + + [Serializable, StructLayout(LayoutKind.Sequential)] + internal struct GenerateReactiveConstants + { + public float scale; + public float threshold; + public float binaryValue; + public uint flags; + } + + [Serializable, StructLayout(LayoutKind.Sequential)] + internal struct GenerateReactiveConstants2 + { + public float autoTcThreshold; + public float autoTcScale; + public float autoReactiveScale; + public float autoReactiveMax; + } + + [Serializable, StructLayout(LayoutKind.Sequential)] + internal struct RcasConstants + { + public RcasConstants(uint sharpness, uint halfSharp) + { + this.sharpness = sharpness; + this.halfSharp = halfSharp; + dummy0 = dummy1 = 0; + } + + public readonly uint sharpness; + public readonly uint halfSharp; + public readonly uint dummy0; + public readonly uint dummy1; + } + } +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs.meta new file mode 100644 index 0000000..e36c358 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: c7350363c6d8a2b4096a9ed97dc4ed95 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs new file mode 100644 index 0000000..95ff401 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs @@ -0,0 +1,152 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using UnityEngine; + +namespace ArmASR +{ + /// + /// Scriptable object containing all shader resources required by Arm Accuracy Super Resolution (ASR). + /// These can be stored in an asset file and referenced from a scene or prefab, avoiding the need to load the shaders from a Resources folder. + /// + [CreateAssetMenu(fileName = "ASR Assets", menuName = "ARM/ASR Assets", order = 1102)] + public class AsrAssets : ScriptableObject + { + public AsrShaders shaders; + +#if UNITY_EDITOR + private void Reset() + { + shaders = new AsrShaders + { + computeLuminancePyramidPass = FindComputeShader("ffxm_fsr2_compute_luminance_pyramid_pass"), + reconstructPreviousDepthPass = FindComputeShader("ffx_fsr2_reconstruct_previous_depth_pass"), + depthClipPass = FindComputeShader("ffx_fsr2_depth_clip_pass"), + lockPass = FindComputeShader("ffxm_fsr2_lock_pass"), + accumulatePass = FindComputeShader("ffx_fsr2_accumulate_pass"), + sharpenPass = FindComputeShader("ffx_fsr2_rcas_pass"), + autoGenReactivePass = FindComputeShader("ffx_fsr2_autogen_reactive_pass"), + tcrAutoGenPass = FindComputeShader("ffx_fsr2_tcr_autogen_pass"), + }; + } + + private static ComputeShader FindComputeShader(string name) + { + string[] assetGuids = UnityEditor.AssetDatabase.FindAssets($"t:ComputeShader {name}"); + if (assetGuids == null || assetGuids.Length == 0) + return null; + + string assetPath = UnityEditor.AssetDatabase.GUIDToAssetPath(assetGuids[0]); + return UnityEditor.AssetDatabase.LoadAssetAtPath(assetPath); + } +#endif + } + + /// + /// All the compute shaders used by ASR. + /// + [System.Serializable] + public class AsrShaders + { + /// + /// The compute shader used by the luminance pyramid computation pass. + /// + public ComputeShader computeLuminancePyramidPass; + + /// + /// The compute shader used by the previous depth reconstruction pass. + /// + public ComputeShader reconstructPreviousDepthPass; + + /// + /// The compute shader used by the depth clip pass. + /// + public ComputeShader depthClipPass; + + /// + /// The compute shader used by the lock pass. + /// + public ComputeShader lockPass; + + /// + /// The compute shader used by the accumulation pass. + /// + public ComputeShader accumulatePass; + + /// + /// The compute shader used by the RCAS sharpening pass. + /// + public ComputeShader sharpenPass; + + /// + /// The compute shader used to auto-generate a reactive mask. + /// + public ComputeShader autoGenReactivePass; + + /// + /// The compute shader used to auto-generate a transparency & composition mask. + /// + public ComputeShader tcrAutoGenPass; + + /// + /// Returns a copy of this class and its contents. + /// + public AsrShaders Clone() + { + return (AsrShaders)MemberwiseClone(); + } + + /// + /// Returns a copy of this class with clones of all its shaders. + /// This can be useful if you're running multiple ASR instances with different shader configurations. + /// Be sure to clean up these clones through Dispose once you're done with them. + /// + public AsrShaders DeepCopy() + { + return new AsrShaders + { + computeLuminancePyramidPass = Object.Instantiate(computeLuminancePyramidPass), + reconstructPreviousDepthPass = Object.Instantiate(reconstructPreviousDepthPass), + depthClipPass = Object.Instantiate(depthClipPass), + lockPass = Object.Instantiate(lockPass), + accumulatePass = Object.Instantiate(accumulatePass), + sharpenPass = Object.Instantiate(sharpenPass), + autoGenReactivePass = Object.Instantiate(autoGenReactivePass), + tcrAutoGenPass = Object.Instantiate(tcrAutoGenPass), + }; + } + + /// + /// Destroy all the shaders within this instance. + /// Use this only on clones created through DeepCopy. + /// + public void Dispose() + { + Object.Destroy(computeLuminancePyramidPass); + Object.Destroy(reconstructPreviousDepthPass); + Object.Destroy(depthClipPass); + Object.Destroy(lockPass); + Object.Destroy(accumulatePass); + Object.Destroy(sharpenPass); + Object.Destroy(autoGenReactivePass); + Object.Destroy(tcrAutoGenPass); + } + } +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs.meta new file mode 100644 index 0000000..022b8ab --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 7a41695239eb36740847744b34c5af43 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrCallbacks.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrCallbacks.cs new file mode 100644 index 0000000..50fe7a6 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrCallbacks.cs @@ -0,0 +1,81 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using UnityEngine; + +namespace ArmASR +{ + /// + /// A collection of callbacks required by the ASR process. + /// This allows some customization by the game dev on how to integrate ASR upscaling into their own game setup. + /// + public interface IAsrCallbacks + { + /// + /// Apply a mipmap bias to in-game textures to prevent them from becoming blurry as the internal rendering resolution lowers. + /// This will need to be customized on a per-game basis, as there is no clear universal way to determine what are "in-game" textures. + /// The default implementation will simply apply a mipmap bias to all 2D textures, which will include things like UI textures and which might miss things like terrain texture arrays. + /// + /// Depending on how your game organizes its assets, you will want to create a filter that more specifically selects the textures that need to have this mipmap bias applied. + /// You may also want to store the bias offset value and apply it to any assets that are loaded in on demand. + /// + void ApplyMipmapBias(float biasOffset); + + void UndoMipmapBias(); + } + + /// + /// Default implementation of IAsrCallbacks. + /// These are fine for testing but a proper game will want to extend and override these methods. + /// + public class AsrCallbacksBase: IAsrCallbacks + { + protected float CurrentBiasOffset = 0; + + public virtual void ApplyMipmapBias(float biasOffset) + { + if (float.IsNaN(biasOffset) || float.IsInfinity(biasOffset)) + return; + + CurrentBiasOffset += biasOffset; + + if (Mathf.Approximately(CurrentBiasOffset, 0f)) + { + CurrentBiasOffset = 0f; + } + + foreach (var texture in Resources.FindObjectsOfTypeAll()) + { + if (texture.mipmapCount <= 1) + continue; + + texture.mipMapBias += biasOffset; + } + } + + public virtual void UndoMipmapBias() + { + if (CurrentBiasOffset == 0f) + return; + + ApplyMipmapBias(-CurrentBiasOffset); + } + } +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrCallbacks.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrCallbacks.cs.meta new file mode 100644 index 0000000..2724280 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrCallbacks.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 78f16fcb80e6325429dfa567a4ed5d4a +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs new file mode 100644 index 0000000..245a87b --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs @@ -0,0 +1,570 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using System; +using System.Runtime.InteropServices; +using UnityEngine; +using UnityEngine.Rendering; + +namespace ArmASR +{ + /// + /// This class loosely matches the FfxFsr2Context struct from the original FSR2 codebase. + /// It manages the various resources and compute passes required by the ASR process. + /// Note that this class does not know anything about Unity render pipelines; all it knows is CommandBuffers and RenderTargetIdentifiers. + /// This should make it suitable for integration with any of the available Unity render pipelines. + /// + public class AsrContext + { + private const int MaxQueuedFrames = 16; + + private Asr.ContextDescription _contextDescription; + private CommandBuffer _commandBuffer; + + private AsrPass _computeLuminancePyramidPass; + private AsrPass _reconstructPreviousDepthPass; + private AsrPass _depthClipPass; + private AsrPass _lockPass; + private AsrPass _accumulatePass; + private AsrPass _sharpenPass; + private AsrPass _generateReactivePass; + private AsrPass _tcrAutogeneratePass; + + private readonly AsrResources _resources = new AsrResources(); + + private ComputeBuffer _upscalerConstantsBuffer; + private readonly Asr.UpscalerConstants[] _upscalerConstantsArray = { new Asr.UpscalerConstants() }; + private ref Asr.UpscalerConstants UpscalerConsts => ref _upscalerConstantsArray[0]; + + private ComputeBuffer _spdConstantsBuffer; + private readonly Asr.SpdConstants[] _spdConstantsArray = { new Asr.SpdConstants() }; + private ref Asr.SpdConstants SpdConsts => ref _spdConstantsArray[0]; + + private ComputeBuffer _rcasConstantsBuffer; + private readonly Asr.RcasConstants[] _rcasConstantsArray = new Asr.RcasConstants[1]; + private ref Asr.RcasConstants RcasConsts => ref _rcasConstantsArray[0]; + + private ComputeBuffer _generateReactiveConstantsBuffer; + private readonly Asr.GenerateReactiveConstants[] _generateReactiveConstantsArray = { new Asr.GenerateReactiveConstants() }; + private ref Asr.GenerateReactiveConstants GenReactiveConsts => ref _generateReactiveConstantsArray[0]; + + private bool _firstExecution; + private Vector2 _previousJitterOffset; + private int _resourceFrameIndex; + + public void Create(Asr.ContextDescription contextDescription) + { + _contextDescription = contextDescription; + _commandBuffer = new CommandBuffer { name = "Arm ASR" }; + + _upscalerConstantsBuffer = CreateConstantBuffer(); + _spdConstantsBuffer = CreateConstantBuffer(); + _rcasConstantsBuffer = CreateConstantBuffer(); + _generateReactiveConstantsBuffer = CreateConstantBuffer(); + + // Set defaults + _firstExecution = true; + _resourceFrameIndex = 0; + + UpscalerConsts.displaySize = _contextDescription.DisplaySize; + + _resources.Create(_contextDescription); + CreatePasses(); + } + + private void CreatePasses() + { + _computeLuminancePyramidPass = new AsrComputeLuminancePyramidPass(_contextDescription, _resources, _upscalerConstantsBuffer, _spdConstantsBuffer); + _reconstructPreviousDepthPass = new AsrReconstructPreviousDepthPass(_contextDescription, _resources, _upscalerConstantsBuffer); + _depthClipPass = new AsrDepthClipPass(_contextDescription, _resources, _upscalerConstantsBuffer); + _lockPass = new AsrLockPass(_contextDescription, _resources, _upscalerConstantsBuffer); + _accumulatePass = new AsrAccumulatePass(_contextDescription, _resources, _upscalerConstantsBuffer); + _sharpenPass = new AsrSharpenPass(_contextDescription, _resources, _upscalerConstantsBuffer, _rcasConstantsBuffer); + _generateReactivePass = new AsrGenerateReactivePass(_contextDescription, _resources, _generateReactiveConstantsBuffer); + } + + public void Destroy() + { + DestroyPass(ref _tcrAutogeneratePass); + DestroyPass(ref _generateReactivePass); + DestroyPass(ref _sharpenPass); + DestroyPass(ref _accumulatePass); + DestroyPass(ref _lockPass); + DestroyPass(ref _depthClipPass); + DestroyPass(ref _reconstructPreviousDepthPass); + DestroyPass(ref _computeLuminancePyramidPass); + + _resources.Destroy(); + + DestroyConstantBuffer(ref _generateReactiveConstantsBuffer); + DestroyConstantBuffer(ref _rcasConstantsBuffer); + DestroyConstantBuffer(ref _spdConstantsBuffer); + DestroyConstantBuffer(ref _upscalerConstantsBuffer); + + if (_commandBuffer != null) + { + _commandBuffer.Dispose(); + _commandBuffer = null; + } + } + + public void Dispatch(Asr.DispatchDescription dispatchParams) + { + _commandBuffer.Clear(); + Dispatch(dispatchParams, _commandBuffer); + Graphics.ExecuteCommandBuffer(_commandBuffer); + } + + public void Dispatch(Asr.DispatchDescription dispatchParams, CommandBuffer commandBuffer) + { + if ((_contextDescription.Flags & Asr.InitializationFlags.EnableDebugChecking) != 0) + { + DebugCheckDispatch(dispatchParams); + } + + if (dispatchParams.UseTextureArrays) + commandBuffer.EnableShaderKeyword("UNITY_FFXM_TEXTURE2D_X_ARRAY"); + + if (_firstExecution) + { + commandBuffer.SetRenderTarget(_resources.LockStatus[0]); + commandBuffer.ClearRenderTarget(false, true, Color.clear); + commandBuffer.SetRenderTarget(_resources.LockStatus[1]); + commandBuffer.ClearRenderTarget(false, true, Color.clear); + } + + int frameIndex = _resourceFrameIndex % 2; + bool resetAccumulation = dispatchParams.Reset || _firstExecution; + _firstExecution = false; + + // If auto exposure is enabled use the auto exposure SRV, otherwise what the app sends + if ((_contextDescription.Flags & Asr.InitializationFlags.EnableAutoExposure) != 0) + dispatchParams.Exposure = new ResourceView(_resources.AutoExposure); + else if (!dispatchParams.Exposure.IsValid) + dispatchParams.Exposure = new ResourceView(_resources.DefaultExposure); + + if (!dispatchParams.Reactive.IsValid) dispatchParams.Reactive = new ResourceView(_resources.DefaultReactive); + if (!dispatchParams.TransparencyAndComposition.IsValid) dispatchParams.TransparencyAndComposition = new ResourceView(_resources.DefaultReactive); + AsrResources.CreateAliasableResources(commandBuffer, _contextDescription, dispatchParams); + + SetupConstants(dispatchParams, resetAccumulation); + + // Reactive mask bias + const int threadGroupWorkRegionDim = 8; + int dispatchSrcX = (UpscalerConsts.renderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchSrcY = (UpscalerConsts.renderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchDstX = (_contextDescription.DisplaySize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchDstY = (_contextDescription.DisplaySize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + + // Clear reconstructed depth for max depth store + if (resetAccumulation) + { + commandBuffer.SetRenderTarget(_resources.LockStatus[frameIndex ^ 1]); + commandBuffer.ClearRenderTarget(false, true, Color.clear); + + commandBuffer.SetRenderTarget(_resources.InternalUpscaled[frameIndex ^ 1]); + commandBuffer.ClearRenderTarget(false, true, Color.clear); + + commandBuffer.SetRenderTarget(_resources.SceneLuminance); + commandBuffer.ClearRenderTarget(false, true, Color.clear); + + // Auto exposure always used to track luma changes in locking logic + commandBuffer.SetRenderTarget(_resources.AutoExposure); + commandBuffer.ClearRenderTarget(false, true, new Color(0f, 1e8f, 0f, 0f)); + + // Reset atomic counter to 0 + commandBuffer.SetRenderTarget(_resources.SpdAtomicCounter); + commandBuffer.ClearRenderTarget(false, true, Color.clear); + } + + // FSR3: need to clear here since we need the content of this surface for frame interpolation, so clearing in the lock pass is not an option + bool depthInverted = (_contextDescription.Flags & Asr.InitializationFlags.EnableDepthInverted) == Asr.InitializationFlags.EnableDepthInverted; + commandBuffer.SetRenderTarget(AsrShaderIDs.UavReconstructedPrevNearestDepth); + commandBuffer.ClearRenderTarget(false, true, depthInverted ? Color.clear : Color.white); + + // Auto exposure + SetupSpdConstants(dispatchParams, out var dispatchThreadGroupCount); + + // Initialize constant buffers data + commandBuffer.SetBufferData(_upscalerConstantsBuffer, _upscalerConstantsArray); + commandBuffer.SetBufferData(_spdConstantsBuffer, _spdConstantsArray); + + // Compute luminance pyramid + _computeLuminancePyramidPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchThreadGroupCount.x, dispatchThreadGroupCount.y); + + // Reconstruct previous depth + _reconstructPreviousDepthPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY); + + // Depth clip + _depthClipPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY); + + // Create locks + _lockPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY); + + // Accumulate + _accumulatePass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchDstX, dispatchDstY); + + if (dispatchParams.EnableSharpening) + { + // Compute the constants + SetupRcasConstants(dispatchParams); + commandBuffer.SetBufferData(_rcasConstantsBuffer, _rcasConstantsArray); + + // Dispatch RCAS + const int threadGroupWorkRegionDimRcas = 16; + int threadGroupsX = (Screen.width + threadGroupWorkRegionDimRcas - 1) / threadGroupWorkRegionDimRcas; + int threadGroupsY = (Screen.height + threadGroupWorkRegionDimRcas - 1) / threadGroupWorkRegionDimRcas; + _sharpenPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, threadGroupsX, threadGroupsY); + } + + _resourceFrameIndex = (_resourceFrameIndex + 1) % MaxQueuedFrames; + + AsrResources.DestroyAliasableResources(commandBuffer); + + commandBuffer.DisableShaderKeyword("UNITY_FFXM_TEXTURE2D_X_ARRAY"); + } + + public void GenerateReactiveMask(Asr.GenerateReactiveDescription dispatchParams) + { + _commandBuffer.Clear(); + GenerateReactiveMask(dispatchParams, _commandBuffer); + Graphics.ExecuteCommandBuffer(_commandBuffer); + } + + public void GenerateReactiveMask(Asr.GenerateReactiveDescription dispatchParams, CommandBuffer commandBuffer) + { + const int threadGroupWorkRegionDim = 8; + int dispatchSrcX = (dispatchParams.RenderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchSrcY = (dispatchParams.RenderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + + GenReactiveConsts.scale = dispatchParams.Scale; + GenReactiveConsts.threshold = dispatchParams.CutoffThreshold; + GenReactiveConsts.binaryValue = dispatchParams.BinaryValue; + GenReactiveConsts.flags = (uint)dispatchParams.Flags; + commandBuffer.SetBufferData(_generateReactiveConstantsBuffer, _generateReactiveConstantsArray); + + ((AsrGenerateReactivePass)_generateReactivePass).ScheduleDispatch(commandBuffer, dispatchParams, dispatchSrcX, dispatchSrcY); + } + + private void SetupConstants(Asr.DispatchDescription dispatchParams, bool resetAccumulation) + { + ref Asr.UpscalerConstants constants = ref UpscalerConsts; + + constants.jitterOffset = dispatchParams.JitterOffset; + constants.renderSize = dispatchParams.RenderSize; + constants.maxRenderSize = _contextDescription.MaxRenderSize; + constants.inputColorResourceDimensions = dispatchParams.InputResourceSize; + + // Compute the horizontal FOV for the shader from the vertical one + float aspectRatio = (float)dispatchParams.RenderSize.x / dispatchParams.RenderSize.y; + float cameraAngleHorizontal = Mathf.Atan(Mathf.Tan(dispatchParams.CameraFovAngleVertical / 2.0f) * aspectRatio) * 2.0f; + constants.tanHalfFOV = Mathf.Tan(cameraAngleHorizontal * 0.5f); + constants.viewSpaceToMetersFactor = (dispatchParams.ViewSpaceToMetersFactor > 0.0f) ? dispatchParams.ViewSpaceToMetersFactor : 1.0f; + + // Compute params to enable device depth to view space depth computation in shader + constants.deviceToViewDepth = SetupDeviceDepthToViewSpaceDepthParams(dispatchParams); + + // To be updated if resource is larger than the actual image size + constants.downscaleFactor = new Vector2((float)constants.renderSize.x / _contextDescription.DisplaySize.x, (float)constants.renderSize.y / _contextDescription.DisplaySize.y); + constants.previousFramePreExposure = constants.preExposure; + constants.preExposure = (dispatchParams.PreExposure != 0) ? dispatchParams.PreExposure : 1.0f; + + // Motion vector data + Vector2Int motionVectorsTargetSize = (_contextDescription.Flags & Asr.InitializationFlags.EnableDisplayResolutionMotionVectors) != 0 ? constants.displaySize : constants.renderSize; + constants.motionVectorScale = dispatchParams.MotionVectorScale / motionVectorsTargetSize; + + // Compute jitter cancellation + if ((_contextDescription.Flags & Asr.InitializationFlags.EnableMotionVectorsJitterCancellation) != 0) + { + constants.motionVectorJitterCancellation = (_previousJitterOffset - constants.jitterOffset) / motionVectorsTargetSize; + _previousJitterOffset = constants.jitterOffset; + } + + int jitterPhaseCount = Asr.GetJitterPhaseCount(dispatchParams.RenderSize.x, _contextDescription.DisplaySize.x); + if (resetAccumulation || constants.jitterPhaseCount == 0) + { + constants.jitterPhaseCount = jitterPhaseCount; + } + else + { + int jitterPhaseCountDelta = (int)(jitterPhaseCount - constants.jitterPhaseCount); + if (jitterPhaseCountDelta > 0) + constants.jitterPhaseCount++; + else if (jitterPhaseCountDelta < 0) + constants.jitterPhaseCount--; + } + + // Convert delta time to seconds and clamp to [0, 1] + constants.deltaTime = Mathf.Clamp01(dispatchParams.FrameTimeDelta); + + if (resetAccumulation) + constants.frameIndex = 0; + else + constants.frameIndex++; + + // Shading change usage of the SPD mip levels + constants.lumaMipLevelToUse = AsrPass.ShadingChangeMipLevel; + + float mipDiv = 2 << constants.lumaMipLevelToUse; + constants.lumaMipDimensions.x = (int)(constants.maxRenderSize.x / mipDiv); + constants.lumaMipDimensions.y = (int)(constants.maxRenderSize.y / mipDiv); + } + + private Vector4 SetupDeviceDepthToViewSpaceDepthParams(Asr.DispatchDescription dispatchParams) + { + bool inverted = (_contextDescription.Flags & Asr.InitializationFlags.EnableDepthInverted) != 0; + bool infinite = (_contextDescription.Flags & Asr.InitializationFlags.EnableDepthInfinite) != 0; + + // make sure it has no impact if near and far plane values are swapped in dispatch params + // the flags "inverted" and "infinite" will decide what transform to use + float min = Mathf.Min(dispatchParams.CameraNear, dispatchParams.CameraFar); + float max = Mathf.Max(dispatchParams.CameraNear, dispatchParams.CameraFar); + + if (inverted) + { + (min, max) = (max, min); + } + + float q = max / (min - max); + float d = -1.0f; + + Vector4 matrixElemC = new Vector4(q, -1.0f - Mathf.Epsilon, q, 0.0f + Mathf.Epsilon); + Vector4 matrixElemE = new Vector4(q * min, -min - Mathf.Epsilon, q * min, max); + + // Revert x and y coords + float aspect = (float)dispatchParams.RenderSize.x / dispatchParams.RenderSize.y; + float cotHalfFovY = Mathf.Cos(0.5f * dispatchParams.CameraFovAngleVertical) / Mathf.Sin(0.5f * dispatchParams.CameraFovAngleVertical); + + int matrixIndex = (inverted ? 2 : 0) + (infinite ? 1 : 0); + return new Vector4( + d * matrixElemC[matrixIndex], + matrixElemE[matrixIndex], + aspect / cotHalfFovY, + 1.0f / cotHalfFovY); + } + + private void SetupRcasConstants(Asr.DispatchDescription dispatchParams) + { + int sharpnessIndex = Mathf.RoundToInt(Mathf.Clamp01(dispatchParams.Sharpness) * (RcasConfigs.Length - 1)); + RcasConsts = RcasConfigs[sharpnessIndex]; + } + + private void SetupSpdConstants(Asr.DispatchDescription dispatchParams, out Vector2Int dispatchThreadGroupCount) + { + RectInt rectInfo = new RectInt(0, 0, dispatchParams.RenderSize.x, dispatchParams.RenderSize.y); + SpdSetup(rectInfo, out dispatchThreadGroupCount, out var workGroupOffset, out var numWorkGroupsAndMips); + + // Downsample + ref Asr.SpdConstants spdConstants = ref SpdConsts; + spdConstants.numWorkGroups = (uint)numWorkGroupsAndMips.x; + spdConstants.mips = (uint)numWorkGroupsAndMips.y; + spdConstants.workGroupOffsetX = (uint)workGroupOffset.x; + spdConstants.workGroupOffsetY = (uint)workGroupOffset.y; + spdConstants.renderSizeX = (uint)dispatchParams.RenderSize.x; + spdConstants.renderSizeY = (uint)dispatchParams.RenderSize.y; + } + + private static void SpdSetup(RectInt rectInfo, out Vector2Int dispatchThreadGroupCount, out Vector2Int workGroupOffset, out Vector2Int numWorkGroupsAndMips, int mips = -1) + { + workGroupOffset = new Vector2Int(rectInfo.x / 64, rectInfo.y / 64); + + int endIndexX = (rectInfo.x + rectInfo.width - 1) / 64; + int endIndexY = (rectInfo.y + rectInfo.height - 1) / 64; + + dispatchThreadGroupCount = new Vector2Int(endIndexX + 1 - workGroupOffset.x, endIndexY + 1 - workGroupOffset.y); + + numWorkGroupsAndMips = new Vector2Int(dispatchThreadGroupCount.x * dispatchThreadGroupCount.y, mips); + if (mips < 0) + { + float resolution = Math.Max(rectInfo.width, rectInfo.height); + numWorkGroupsAndMips.y = Math.Min(Mathf.FloorToInt(Mathf.Log(resolution, 2.0f)), 12); + } + } + + private void DebugCheckDispatch(Asr.DispatchDescription dispatchParams) + { + if (!dispatchParams.Color.IsValid) + { + Debug.LogError("Color resource is null"); + } + + if (!dispatchParams.Depth.IsValid) + { + Debug.LogError("Depth resource is null"); + } + + if (!dispatchParams.MotionVectors.IsValid) + { + Debug.LogError("MotionVectors resource is null"); + } + + if (!dispatchParams.Output.IsValid) + { + Debug.LogError("Output resource is null"); + } + + if (dispatchParams.Exposure.IsValid && (_contextDescription.Flags & Asr.InitializationFlags.EnableAutoExposure) != 0) + { + Debug.LogWarning("Exposure resource provided, however auto exposure flag is present"); + } + + if (Mathf.Abs(dispatchParams.JitterOffset.x) > 1.0f || Mathf.Abs(dispatchParams.JitterOffset.y) > 1.0f) + { + Debug.LogWarning("JitterOffset contains value outside of expected range [-1.0, 1.0]"); + } + + if (dispatchParams.MotionVectorScale.x > _contextDescription.MaxRenderSize.x || dispatchParams.MotionVectorScale.y > _contextDescription.MaxRenderSize.y) + { + Debug.LogWarning("MotionVectorScale contains scale value greater than MaxRenderSize"); + } + + if (dispatchParams.MotionVectorScale.x == 0.0f || dispatchParams.MotionVectorScale.y == 0.0f) + { + Debug.LogWarning("MotionVectorScale contains zero scale value"); + } + + if (dispatchParams.RenderSize.x > _contextDescription.MaxRenderSize.x || dispatchParams.RenderSize.y > _contextDescription.MaxRenderSize.y) + { + Debug.LogWarning("RenderSize is greater than context MaxRenderSize"); + } + + if (dispatchParams.RenderSize.x == 0 || dispatchParams.RenderSize.y == 0) + { + Debug.LogWarning("RenderSize contains zero dimension"); + } + + if (dispatchParams.FrameTimeDelta > 1.0f) + { + Debug.LogWarning("FrameTimeDelta is greater than 1.0f - this value should be seconds (~0.0166 for 60fps)"); + } + + if (dispatchParams.PreExposure == 0.0f) + { + Debug.LogError("PreExposure provided as 0.0f which is invalid"); + } + + bool infiniteDepth = (_contextDescription.Flags & Asr.InitializationFlags.EnableDepthInfinite) != 0; + bool inverseDepth = (_contextDescription.Flags & Asr.InitializationFlags.EnableDepthInverted) != 0; + + if (inverseDepth) + { + if (dispatchParams.CameraNear < dispatchParams.CameraFar) + { + Debug.LogWarning("EnableDepthInverted flag is present yet CameraNear is less than CameraFar"); + } + + if (infiniteDepth) + { + if (dispatchParams.CameraNear < float.MaxValue) + { + Debug.LogWarning("EnableDepthInfinite and EnableDepthInverted present, yet CameraNear != float.MaxValue"); + } + } + + if (dispatchParams.CameraFar < 0.075f) + { + Debug.LogWarning("EnableDepthInverted present, CameraFar value is very low which may result in depth separation artefacting"); + } + } + else + { + if (dispatchParams.CameraNear > dispatchParams.CameraFar) + { + Debug.LogWarning("CameraNear is greater than CameraFar in non-inverted-depth context"); + } + + if (infiniteDepth) + { + if (dispatchParams.CameraFar < float.MaxValue) + { + Debug.LogWarning("EnableDepthInfinite present, yet CameraFar != float.MaxValue"); + } + } + + if (dispatchParams.CameraNear < 0.075f) + { + Debug.LogWarning("CameraNear value is very low which may result in depth separation artefacting"); + } + } + + if (dispatchParams.CameraFovAngleVertical <= 0.0f) + { + Debug.LogError("CameraFovAngleVertical is 0.0f - this value should be > 0.0f"); + } + + if (dispatchParams.CameraFovAngleVertical > Mathf.PI) + { + Debug.LogError("CameraFovAngleVertical is greater than 180 degrees/PI"); + } + } + + /// + /// The ASR C++ codebase uses floats bitwise converted to ints to pass sharpness parameters to the RCAS shader. + /// This is not possible in C# without enabling unsafe code compilation, so to avoid that we instead use a table of precomputed values. + /// + private static readonly Asr.RcasConstants[] RcasConfigs = new [] + { + new Asr.RcasConstants(1048576000u, 872428544u), + new Asr.RcasConstants(1049178080u, 877212745u), + new Asr.RcasConstants(1049823372u, 882390168u), + new Asr.RcasConstants(1050514979u, 887895276u), + new Asr.RcasConstants(1051256227u, 893859143u), + new Asr.RcasConstants(1052050675u, 900216232u), + new Asr.RcasConstants(1052902144u, 907032080u), + new Asr.RcasConstants(1053814727u, 914306687u), + new Asr.RcasConstants(1054792807u, 922105590u), + new Asr.RcasConstants(1055841087u, 930494326u), + new Asr.RcasConstants(1056964608u, 939538432u), + new Asr.RcasConstants(1057566688u, 944322633u), + new Asr.RcasConstants(1058211980u, 949500056u), + new Asr.RcasConstants(1058903587u, 955005164u), + new Asr.RcasConstants(1059644835u, 960969031u), + new Asr.RcasConstants(1060439283u, 967326120u), + new Asr.RcasConstants(1061290752u, 974141968u), + new Asr.RcasConstants(1062203335u, 981416575u), + new Asr.RcasConstants(1063181415u, 989215478u), + new Asr.RcasConstants(1064229695u, 997604214u), + new Asr.RcasConstants(1065353216u, 1006648320), + }; + + private static ComputeBuffer CreateConstantBuffer() where TConstants: struct + { + return new ComputeBuffer(1, Marshal.SizeOf(), ComputeBufferType.Constant); + } + + private static void DestroyConstantBuffer(ref ComputeBuffer bufferRef) + { + if (bufferRef == null) + return; + + bufferRef.Release(); + bufferRef = null; + } + + private static void DestroyPass(ref AsrPass pass) + { + if (pass == null) + return; + + pass.Dispose(); + pass = null; + } + } +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs.meta new file mode 100644 index 0000000..3a18521 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: c348b7c44539db74994c5846caec5871 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs new file mode 100644 index 0000000..5a76b16 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs @@ -0,0 +1,339 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using System; +using UnityEngine; +using UnityEngine.Profiling; +using UnityEngine.Rendering; + +namespace ArmASR +{ + /// + /// Base class for all the compute passes that make up the ASR process. + /// This loosely matches the FfxPipelineState struct from the original ASR codebase, wrapped in an object-oriented blanket. + /// These classes are responsible for loading compute shaders, managing temporary resources, binding resources to shader kernels and dispatching said shaders. + /// + internal abstract class AsrPass: IDisposable + { + internal const int ShadingChangeMipLevel = 4; // This matches the FFXM_FSR2_SHADING_CHANGE_MIP_LEVEL define + + protected readonly Asr.ContextDescription ContextDescription; + protected readonly AsrResources Resources; + protected readonly ComputeBuffer Constants; + + protected ComputeShader ComputeShader; + protected int KernelIndex; + + protected CustomSampler Sampler; + + protected AsrPass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) + { + ContextDescription = contextDescription; + Resources = resources; + Constants = constants; + } + + public virtual void Dispose() + { + } + + public void ScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + commandBuffer.BeginSample(Sampler); + DoScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchX, dispatchY); + commandBuffer.EndSample(Sampler); + } + + protected abstract void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY); + + protected void InitComputeShader(string passName, ComputeShader shader) + { + InitComputeShader(passName, shader, ContextDescription.Flags); + } + + private void InitComputeShader(string passName, ComputeShader shader, Asr.InitializationFlags flags) + { + if (shader == null) + { + throw new MissingReferenceException($"Shader for ASR pass '{passName}' could not be loaded! Please ensure it is included in the project correctly."); + } + + ComputeShader = shader; + KernelIndex = ComputeShader.FindKernel("main"); + Sampler = CustomSampler.Create(passName); + + bool useLut = false; +#if UNITY_2022_1_OR_NEWER // This will also work in 2020.3.43+ and 2021.3.14+ + if (SystemInfo.computeSubGroupSize == 64) + { + useLut = true; + } +#endif + + // This matches the permutation rules from the CreatePipeline* functions + if ((flags & Asr.InitializationFlags.EnableHighDynamicRange) != 0) ComputeShader.EnableKeyword("FFXM_FSR2_OPTION_HDR_COLOR_INPUT"); + if ((flags & Asr.InitializationFlags.EnableDisplayResolutionMotionVectors) == 0) ComputeShader.EnableKeyword("FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS"); + if ((flags & Asr.InitializationFlags.EnableMotionVectorsJitterCancellation) != 0) ComputeShader.EnableKeyword("FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS"); + if ((flags & Asr.InitializationFlags.EnableDepthInverted) != 0) ComputeShader.EnableKeyword("FFXM_FSR2_OPTION_INVERTED_DEPTH"); + if (useLut) ComputeShader.EnableKeyword("FFXM_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE"); + if ((flags & Asr.InitializationFlags.EnableFP16Usage) != 0) ComputeShader.EnableKeyword("FFXM_HALF"); + } + } + + internal class AsrComputeLuminancePyramidPass : AsrPass + { + private readonly ComputeBuffer _spdConstants; + + public AsrComputeLuminancePyramidPass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants, ComputeBuffer spdConstants) + : base(contextDescription, resources, constants) + { + _spdConstants = spdConstants; + + InitComputeShader("Compute Luminance Pyramid", contextDescription.Shaders.computeLuminancePyramidPass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + ref var color = ref dispatchParams.Color; + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputColor, color.RenderTarget, color.MipLevel, color.SubElement); + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavSpdAtomicCount, Resources.SpdAtomicCounter); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavExposureMipLumaChange, Resources.SceneLuminance, ShadingChangeMipLevel); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavExposureMip5, Resources.SceneLuminance, 5); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavAutoExposure, Resources.AutoExposure); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); + commandBuffer.SetComputeConstantBufferParam(ComputeShader, AsrShaderIDs.CbSpd, _spdConstants, 0, _spdConstants.stride); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } + + internal class AsrReconstructPreviousDepthPass : AsrPass + { + public AsrReconstructPreviousDepthPass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) + : base(contextDescription, resources, constants) + { + InitComputeShader("Reconstruct & Dilate", contextDescription.Shaders.reconstructPreviousDepthPass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + ref var color = ref dispatchParams.Color; + ref var depth = ref dispatchParams.Depth; + ref var motionVectors = ref dispatchParams.MotionVectors; + ref var exposure = ref dispatchParams.Exposure; + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputColor, color.RenderTarget, color.MipLevel, color.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputDepth, depth.RenderTarget, depth.MipLevel, depth.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputMotionVectors, motionVectors.RenderTarget, motionVectors.MipLevel, motionVectors.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputExposure, exposure.RenderTarget, exposure.MipLevel, exposure.SubElement); + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } + + internal class AsrDepthClipPass : AsrPass + { + public AsrDepthClipPass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) + : base(contextDescription, resources, constants) + { + InitComputeShader("Depth Clip", contextDescription.Shaders.depthClipPass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + ref var color = ref dispatchParams.Color; + ref var depth = ref dispatchParams.Depth; + ref var motionVectors = ref dispatchParams.MotionVectors; + ref var exposure = ref dispatchParams.Exposure; + ref var reactive = ref dispatchParams.Reactive; + ref var tac = ref dispatchParams.TransparencyAndComposition; + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputColor, color.RenderTarget, color.MipLevel, color.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputDepth, depth.RenderTarget, depth.MipLevel, depth.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputMotionVectors, motionVectors.RenderTarget, motionVectors.MipLevel, motionVectors.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputExposure, exposure.RenderTarget, exposure.MipLevel, exposure.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvReactiveMask, reactive.RenderTarget, reactive.MipLevel, reactive.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvTransparencyAndCompositionMask, tac.RenderTarget, tac.MipLevel, tac.SubElement); + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvReconstructedPrevNearestDepth, AsrShaderIDs.UavReconstructedPrevNearestDepth); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvDilatedDepth, AsrShaderIDs.UavDilatedDepth); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvPrevDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex ^ 1]); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } + + internal class AsrLockPass : AsrPass + { + public AsrLockPass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) + : base(contextDescription, resources, constants) + { + InitComputeShader("Create Locks", contextDescription.Shaders.lockPass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvLockInputLuma, AsrShaderIDs.UavLockInputLuma); + commandBuffer.SetComputeConstantBufferParam(ComputeShader, AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } + + internal class AsrAccumulatePass : AsrPass + { + private const string SharpeningKeyword = "FFXM_FSR2_OPTION_APPLY_SHARPENING"; + +#if UNITY_2021_2_OR_NEWER + private readonly LocalKeyword _sharpeningKeyword; +#endif + + public AsrAccumulatePass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) + : base(contextDescription, resources, constants) + { + InitComputeShader("Reproject & Accumulate", contextDescription.Shaders.accumulatePass); +#if UNITY_2021_2_OR_NEWER + _sharpeningKeyword = new LocalKeyword(ComputeShader, SharpeningKeyword); +#endif + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { +#if UNITY_2021_2_OR_NEWER + if (dispatchParams.EnableSharpening) + commandBuffer.EnableKeyword(ComputeShader, _sharpeningKeyword); + else + commandBuffer.DisableKeyword(ComputeShader, _sharpeningKeyword); +#else + if (dispatchParams.EnableSharpening) + commandBuffer.EnableShaderKeyword(SharpeningKeyword); + else + commandBuffer.DisableShaderKeyword(SharpeningKeyword); +#endif + + if ((ContextDescription.Flags & Asr.InitializationFlags.EnableDisplayResolutionMotionVectors) == 0) + { + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]); + } + else + { + ref var motionVectors = ref dispatchParams.MotionVectors; + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputMotionVectors, motionVectors.RenderTarget, motionVectors.MipLevel, motionVectors.SubElement); + } + + ref var exposure = ref dispatchParams.Exposure; + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputExposure, exposure.RenderTarget, exposure.MipLevel, exposure.SubElement); + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvDilatedReactiveMasks, AsrShaderIDs.UavDilatedReactiveMasks); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInternalUpscaled, Resources.InternalUpscaled[frameIndex ^ 1]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvLockStatus, Resources.LockStatus[frameIndex ^ 1]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvPreparedInputColor, AsrShaderIDs.UavPreparedInputColor); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvLanczosLut, Resources.LanczosLut); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvUpscaleMaximumBiasLut, Resources.MaximumBiasLut); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvSceneLuminanceMips, Resources.SceneLuminance); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvAutoExposure, Resources.AutoExposure); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvLumaHistory, Resources.LumaHistory[frameIndex ^ 1]); + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavInternalUpscaled, Resources.InternalUpscaled[frameIndex]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavLockStatus, Resources.LockStatus[frameIndex]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavLumaHistory, Resources.LumaHistory[frameIndex]); + + ref var output = ref dispatchParams.Output; + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavUpscaledOutput, output.RenderTarget, output.MipLevel, output.SubElement); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } + + internal class AsrSharpenPass : AsrPass + { + private readonly ComputeBuffer _rcasConstants; + + public AsrSharpenPass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants, ComputeBuffer rcasConstants) + : base(contextDescription, resources, constants) + { + _rcasConstants = rcasConstants; + + InitComputeShader("RCAS Sharpening", contextDescription.Shaders.sharpenPass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + ref var exposure = ref dispatchParams.Exposure; + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputExposure, exposure.RenderTarget, exposure.MipLevel, exposure.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvRcasInput, Resources.InternalUpscaled[frameIndex]); + + ref var output = ref dispatchParams.Output; + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavUpscaledOutput, output.RenderTarget, output.MipLevel, output.SubElement); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); + commandBuffer.SetComputeConstantBufferParam(ComputeShader, AsrShaderIDs.CbRcas, _rcasConstants, 0, _rcasConstants.stride); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } + + internal class AsrGenerateReactivePass : AsrPass + { + private readonly ComputeBuffer _generateReactiveConstants; + + public AsrGenerateReactivePass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer generateReactiveConstants) + : base(contextDescription, resources, null) + { + _generateReactiveConstants = generateReactiveConstants; + + InitComputeShader("Auto-Generate Reactive Mask", contextDescription.Shaders.autoGenReactivePass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + } + + public void ScheduleDispatch(CommandBuffer commandBuffer, Asr.GenerateReactiveDescription dispatchParams, int dispatchX, int dispatchY) + { + commandBuffer.BeginSample(Sampler); + + ref var opaqueOnly = ref dispatchParams.ColorOpaqueOnly; + ref var color = ref dispatchParams.ColorPreUpscale; + ref var reactive = ref dispatchParams.OutReactive; + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvOpaqueOnly, opaqueOnly.RenderTarget, opaqueOnly.MipLevel, opaqueOnly.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputColor, color.RenderTarget, color.MipLevel, color.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavAutoReactive, reactive.RenderTarget, reactive.MipLevel, reactive.SubElement); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, AsrShaderIDs.CbGenReactive, _generateReactiveConstants, 0, _generateReactiveConstants.stride); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + + commandBuffer.EndSample(Sampler); + } + } +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs.meta new file mode 100644 index 0000000..5b01d20 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 7fb53d9f929886c4ab35be8d9010b9c3 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs new file mode 100644 index 0000000..44dd65a --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs @@ -0,0 +1,227 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using System; +using UnityEngine; +using UnityEngine.Experimental.Rendering; +using UnityEngine.Rendering; + +namespace ArmASR +{ + /// + /// Helper class for bundling and managing persistent resources required by the ASR process. + /// This includes lookup tables, default fallback resources and double-buffered resources that get swapped between frames. + /// + internal class AsrResources + { + public Texture2D DefaultExposure; + public Texture2D DefaultReactive; + public Texture2D LanczosLut; + public Texture2D MaximumBiasLut; + public RenderTexture SpdAtomicCounter; + public RenderTexture AutoExposure; + public RenderTexture SceneLuminance; + public readonly RenderTexture[] DilatedMotionVectors = new RenderTexture[2]; + public readonly RenderTexture[] LockStatus = new RenderTexture[2]; + public readonly RenderTexture[] InternalUpscaled = new RenderTexture[2]; + public readonly RenderTexture[] LumaHistory = new RenderTexture[2]; + + public void Create(Asr.ContextDescription contextDescription) + { + // Generate the data for the LUT + const int lanczos2LutWidth = 128; + float[] lanczos2Weights = new float[lanczos2LutWidth]; + for (int currentLanczosWidthIndex = 0; currentLanczosWidthIndex < lanczos2LutWidth; ++currentLanczosWidthIndex) + { + float x = 2.0f * currentLanczosWidthIndex / (lanczos2LutWidth - 1); + float y = Asr.Lanczos2(x); + lanczos2Weights[currentLanczosWidthIndex] = y; + } + + float[] maximumBias = new float[MaximumBiasTextureWidth * MaximumBiasTextureHeight]; + for (int i = 0; i < maximumBias.Length; ++i) + { + maximumBias[i] = MaximumBias[i] / 2.0f; + } + + // Resource FSR2_LanczosLutData: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R16_SNORM, FFX_RESOURCE_FLAGS_NONE + // R16_SNorm textures are not supported by Unity on most platforms, strangely enough. So instead we use R32_SFloat and upload pre-normalized float data. + LanczosLut = new Texture2D(lanczos2LutWidth, 1, GraphicsFormat.R32_SFloat, TextureCreationFlags.None) { name = "ASR_LanczosLutData" }; + LanczosLut.SetPixelData(lanczos2Weights, 0); + LanczosLut.Apply(); + + // Resource FSR2_MaximumUpsampleBias: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R16_SNORM, FFX_RESOURCE_FLAGS_NONE + MaximumBiasLut = new Texture2D(MaximumBiasTextureWidth, MaximumBiasTextureHeight, GraphicsFormat.R32_SFloat, TextureCreationFlags.None) { name = "ASR_MaximumUpsampleBias" }; + MaximumBiasLut.SetPixelData(maximumBias, 0); + MaximumBiasLut.Apply(); + + // Resource FSR2_DefaultExposure: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R32G32_FLOAT, FFX_RESOURCE_FLAGS_NONE + DefaultExposure = new Texture2D(1, 1, GraphicsFormat.R32G32_SFloat, TextureCreationFlags.None) { name = "ASR_DefaultExposure" }; + DefaultExposure.SetPixel(0, 0, Color.clear); + DefaultExposure.Apply(); + + // Resource FSR2_DefaultReactivityMask: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_NONE + DefaultReactive = new Texture2D(1, 1, GraphicsFormat.R8_UNorm, TextureCreationFlags.None) { name = "ASR_DefaultReactivityMask" }; + DefaultReactive.SetPixel(0, 0, Color.clear); + DefaultReactive.Apply(); + + // Resource FSR2_SpdAtomicCounter: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_UINT, FFX_RESOURCE_FLAGS_ALIASABLE + // Despite what the original FSR2 codebase says, this resource really isn't aliasable. Resetting this counter to 0 every frame breaks auto-exposure on MacOS Metal. + SpdAtomicCounter = new RenderTexture(1, 1, 0, GraphicsFormat.R32_UInt) { name = "ASR_SpdAtomicCounter", enableRandomWrite = true }; + SpdAtomicCounter.Create(); + + // Resource FSR2_AutoExposure: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32G32_FLOAT, FFX_RESOURCE_FLAGS_NONE + AutoExposure = new RenderTexture(1, 1, 0, GraphicsFormat.R32G32_SFloat) { name = "ASR_AutoExposure", enableRandomWrite = true }; + AutoExposure.Create(); + + // Resource FSR2_ExposureMips: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE + // This is a rather special case: it's an aliasable resource, but because we require a mipmap chain and bind specific mip levels per shader, we can't easily use temporary RTs for this. + int w = contextDescription.MaxRenderSize.x / 2, h = contextDescription.MaxRenderSize.y / 2; + int mipCount = 1 + Mathf.FloorToInt(Mathf.Log(Math.Max(w, h), 2.0f)); + SceneLuminance = new RenderTexture(w, h, 0, GraphicsFormat.R16_SFloat, mipCount) { name = "ASR_ExposureMips", enableRandomWrite = true, useMipMap = true, autoGenerateMips = false }; + SceneLuminance.Create(); + + // Resources FSR2_InternalDilatedVelocity1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16_FLOAT, FFX_RESOURCE_FLAGS_NONE + CreateDoubleBufferedResource(DilatedMotionVectors, "ASR_InternalDilatedVelocity", contextDescription.MaxRenderSize, GraphicsFormat.R16G16_SFloat); + + // Resources FSR2_LockStatus1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16_FLOAT, FFX_RESOURCE_FLAGS_NONE + CreateDoubleBufferedResource(LockStatus, "ASR_LockStatus", contextDescription.DisplaySize, GraphicsFormat.R16G16_SFloat); + + // Resources FSR2_InternalUpscaled1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, FFX_RESOURCE_FLAGS_NONE + CreateDoubleBufferedResource(InternalUpscaled, "ASR_InternalUpscaled", contextDescription.DisplaySize, GraphicsFormat.R16G16B16A16_SFloat); + + // Resources FSR2_LumaHistory1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, FFX_RESOURCE_FLAGS_NONE + CreateDoubleBufferedResource(LumaHistory, "ASR_LumaHistory", contextDescription.DisplaySize, GraphicsFormat.R8G8B8A8_UNorm); + } + + // Set up shared aliasable resources, i.e. temporary render textures + // These do not need to persist between frames, but they do need to be available between passes + public static void CreateAliasableResources(CommandBuffer commandBuffer, Asr.ContextDescription contextDescription, Asr.DispatchDescription dispatchParams) + { + Vector2Int displaySize = contextDescription.DisplaySize; + Vector2Int maxRenderSize = contextDescription.MaxRenderSize; + + // FSR2_ReconstructedPrevNearestDepth: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_UINT, FFX_RESOURCE_FLAGS_ALIASABLE + commandBuffer.GetTemporaryRT(AsrShaderIDs.UavReconstructedPrevNearestDepth, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R32_UInt, 1, true); + + // FSR2_DilatedDepth: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE + commandBuffer.GetTemporaryRT(AsrShaderIDs.UavDilatedDepth, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R32_SFloat, 1, true); + + // FSR2_LockInputLuma: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE + commandBuffer.GetTemporaryRT(AsrShaderIDs.UavLockInputLuma, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R16_SFloat, 1, true); + + // FSR2_DilatedReactiveMasks: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8G8_UNORM, FFX_RESOURCE_FLAGS_ALIASABLE + commandBuffer.GetTemporaryRT(AsrShaderIDs.UavDilatedReactiveMasks, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R8G8_UNorm, 1, true); + + // FSR2_PreparedInputColor: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE + commandBuffer.GetTemporaryRT(AsrShaderIDs.UavPreparedInputColor, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R16G16B16A16_SFloat, 1, true); + + // FSR2_NewLocks: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_ALIASABLE + commandBuffer.GetTemporaryRT(AsrShaderIDs.UavNewLocks, displaySize.x, displaySize.y, 0, default, GraphicsFormat.R8_UNorm, 1, true); + } + + public static void DestroyAliasableResources(CommandBuffer commandBuffer) + { + // Release all of the aliasable resources used this frame + commandBuffer.ReleaseTemporaryRT(AsrShaderIDs.UavReconstructedPrevNearestDepth); + commandBuffer.ReleaseTemporaryRT(AsrShaderIDs.UavDilatedDepth); + commandBuffer.ReleaseTemporaryRT(AsrShaderIDs.UavLockInputLuma); + commandBuffer.ReleaseTemporaryRT(AsrShaderIDs.UavDilatedReactiveMasks); + commandBuffer.ReleaseTemporaryRT(AsrShaderIDs.UavPreparedInputColor); + commandBuffer.ReleaseTemporaryRT(AsrShaderIDs.UavNewLocks); + } + + private static void CreateDoubleBufferedResource(RenderTexture[] resource, string name, Vector2Int size, GraphicsFormat format) + { + for (int i = 0; i < 2; ++i) + { + resource[i] = new RenderTexture(size.x, size.y, 0, format) { name = name + (i + 1), enableRandomWrite = true }; + resource[i].Create(); + } + } + + public void Destroy() + { + DestroyResource(LumaHistory); + DestroyResource(InternalUpscaled); + DestroyResource(LockStatus); + DestroyResource(DilatedMotionVectors); + DestroyResource(ref SceneLuminance); + DestroyResource(ref AutoExposure); + DestroyResource(ref DefaultReactive); + DestroyResource(ref DefaultExposure); + DestroyResource(ref MaximumBiasLut); + DestroyResource(ref LanczosLut); + } + + private static void DestroyResource(ref Texture2D resource) + { + if (resource == null) + return; + +#if UNITY_EDITOR + if (Application.isPlaying && !UnityEditor.EditorApplication.isPaused) + UnityEngine.Object.Destroy(resource); + else + UnityEngine.Object.DestroyImmediate(resource); +#else + UnityEngine.Object.Destroy(resource); +#endif + resource = null; + } + + private static void DestroyResource(ref RenderTexture resource) + { + if (resource == null) + return; + + resource.Release(); + resource = null; + } + + private static void DestroyResource(RenderTexture[] resource) + { + for (int i = 0; i < resource.Length; ++i) + DestroyResource(ref resource[i]); + } + + private const int MaximumBiasTextureWidth = 16; + private const int MaximumBiasTextureHeight = 16; + private static readonly float[] MaximumBias = + { + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.876f, 1.809f, 1.772f, 1.753f, 1.748f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.869f, 1.801f, 1.764f, 1.745f, 1.739f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.976f, 1.841f, 1.774f, 1.737f, 1.716f, 1.71f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.914f, 1.784f, 1.716f, 1.673f, 1.649f, 1.641f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.793f, 1.676f, 1.604f, 1.562f, 1.54f, 1.533f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.802f, 1.619f, 1.536f, 1.492f, 1.467f, 1.454f, 1.449f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.812f, 1.575f, 1.496f, 1.456f, 1.432f, 1.416f, 1.408f, 1.405f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.555f, 1.479f, 1.438f, 1.413f, 1.398f, 1.387f, 1.381f, 1.379f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.812f, 1.555f, 1.474f, 1.43f, 1.404f, 1.387f, 1.376f, 1.368f, 1.363f, 1.362f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.802f, 1.575f, 1.479f, 1.43f, 1.401f, 1.382f, 1.369f, 1.36f, 1.354f, 1.351f, 1.35f, + 2.0f, 2.0f, 1.976f, 1.914f, 1.793f, 1.619f, 1.496f, 1.438f, 1.404f, 1.382f, 1.367f, 1.357f, 1.349f, 1.344f, 1.341f, 1.34f, + 1.876f, 1.869f, 1.841f, 1.784f, 1.676f, 1.536f, 1.456f, 1.413f, 1.387f, 1.369f, 1.357f, 1.347f, 1.341f, 1.336f, 1.333f, 1.332f, + 1.809f, 1.801f, 1.774f, 1.716f, 1.604f, 1.492f, 1.432f, 1.398f, 1.376f, 1.36f, 1.349f, 1.341f, 1.335f, 1.33f, 1.328f, 1.327f, + 1.772f, 1.764f, 1.737f, 1.673f, 1.562f, 1.467f, 1.416f, 1.387f, 1.368f, 1.354f, 1.344f, 1.336f, 1.33f, 1.326f, 1.323f, 1.323f, + 1.753f, 1.745f, 1.716f, 1.649f, 1.54f, 1.454f, 1.408f, 1.381f, 1.363f, 1.351f, 1.341f, 1.333f, 1.328f, 1.323f, 1.321f, 1.32f, + 1.748f, 1.739f, 1.71f, 1.641f, 1.533f, 1.449f, 1.405f, 1.379f, 1.362f, 1.35f, 1.34f, 1.332f, 1.327f, 1.323f, 1.32f, 1.319f, + }; + } +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs.meta new file mode 100644 index 0000000..b0f5f23 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 538f6eefa95c8ee4d9f6a9bc4bb3188e +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrShaderIDs.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrShaderIDs.cs new file mode 100644 index 0000000..8f829bb --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrShaderIDs.cs @@ -0,0 +1,75 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using UnityEngine; + +namespace ArmASR +{ + public static class AsrShaderIDs + { + // Shader resource views, i.e. read-only bindings + public static readonly int SrvInputColor = Shader.PropertyToID("r_input_color_jittered"); + public static readonly int SrvOpaqueOnly = Shader.PropertyToID("r_input_opaque_only"); + public static readonly int SrvInputMotionVectors = Shader.PropertyToID("r_input_motion_vectors"); + public static readonly int SrvInputDepth = Shader.PropertyToID("r_input_depth"); + public static readonly int SrvInputExposure = Shader.PropertyToID("r_input_exposure"); + public static readonly int SrvAutoExposure = Shader.PropertyToID("r_auto_exposure"); + public static readonly int SrvReactiveMask = Shader.PropertyToID("r_reactive_mask"); + public static readonly int SrvTransparencyAndCompositionMask = Shader.PropertyToID("r_transparency_and_composition_mask"); + public static readonly int SrvReconstructedPrevNearestDepth = Shader.PropertyToID("r_reconstructed_previous_nearest_depth"); + public static readonly int SrvDilatedMotionVectors = Shader.PropertyToID("r_dilated_motion_vectors"); + public static readonly int SrvPrevDilatedMotionVectors = Shader.PropertyToID("r_previous_dilated_motion_vectors"); + public static readonly int SrvDilatedDepth = Shader.PropertyToID("r_dilatedDepth"); + public static readonly int SrvInternalUpscaled = Shader.PropertyToID("r_internal_upscaled_color"); + public static readonly int SrvLockStatus = Shader.PropertyToID("r_lock_status"); + public static readonly int SrvLockInputLuma = Shader.PropertyToID("r_lock_input_luma"); + public static readonly int SrvPreparedInputColor = Shader.PropertyToID("r_prepared_input_color"); + public static readonly int SrvLumaHistory = Shader.PropertyToID("r_luma_history"); + public static readonly int SrvRcasInput = Shader.PropertyToID("r_rcas_input"); + public static readonly int SrvLanczosLut = Shader.PropertyToID("r_lanczos_lut"); + public static readonly int SrvSceneLuminanceMips = Shader.PropertyToID("r_imgMips"); + public static readonly int SrvUpscaleMaximumBiasLut = Shader.PropertyToID("r_upsample_maximum_bias_lut"); + public static readonly int SrvDilatedReactiveMasks = Shader.PropertyToID("r_dilated_reactive_masks"); + + // Unordered access views, i.e. random read/write bindings + public static readonly int UavReconstructedPrevNearestDepth = Shader.PropertyToID("rw_reconstructed_previous_nearest_depth"); + public static readonly int UavDilatedMotionVectors = Shader.PropertyToID("rw_dilated_motion_vectors"); + public static readonly int UavDilatedDepth = Shader.PropertyToID("rw_dilatedDepth"); + public static readonly int UavInternalUpscaled = Shader.PropertyToID("rw_internal_upscaled_color"); + public static readonly int UavLockStatus = Shader.PropertyToID("rw_lock_status"); + public static readonly int UavLockInputLuma = Shader.PropertyToID("rw_lock_input_luma"); + public static readonly int UavNewLocks = Shader.PropertyToID("rw_new_locks"); + public static readonly int UavPreparedInputColor = Shader.PropertyToID("rw_prepared_input_color"); + public static readonly int UavLumaHistory = Shader.PropertyToID("rw_luma_history"); + public static readonly int UavUpscaledOutput = Shader.PropertyToID("rw_upscaled_output"); + public static readonly int UavExposureMipLumaChange = Shader.PropertyToID("rw_img_mip_shading_change"); + public static readonly int UavExposureMip5 = Shader.PropertyToID("rw_img_mip_5"); + public static readonly int UavDilatedReactiveMasks = Shader.PropertyToID("rw_dilated_reactive_masks"); + public static readonly int UavAutoExposure = Shader.PropertyToID("rw_auto_exposure"); + public static readonly int UavSpdAtomicCount = Shader.PropertyToID("rw_spd_global_atomic"); + public static readonly int UavAutoReactive = Shader.PropertyToID("rw_output_autoreactive"); + + // Constant buffer bindings + public static readonly int CbFsr2 = Shader.PropertyToID("cbFSR2"); + public static readonly int CbSpd = Shader.PropertyToID("cbSPD"); + public static readonly int CbRcas = Shader.PropertyToID("cbRCAS"); + public static readonly int CbGenReactive = Shader.PropertyToID("cbGenerateReactive"); + } +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrShaderIDs.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrShaderIDs.cs.meta new file mode 100644 index 0000000..c65dbb6 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrShaderIDs.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: e0173241f8bd75e419590b43a3739e0e +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/ResourceView.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/ResourceView.cs new file mode 100644 index 0000000..fab2113 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/ResourceView.cs @@ -0,0 +1,55 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using UnityEngine.Rendering; + +namespace ArmASR +{ + /// + /// An immutable structure wrapping all the necessary information to bind a specific buffer or attachment of a render target to a compute shader. + /// + public readonly struct ResourceView + { + /// + /// This value is the equivalent of not setting any value at all; all struct fields will have their default values. + /// It does not refer to a valid texture, therefore any variable set to this value should be checked for IsValid and reassigned before being bound to a shader. + /// + public static readonly ResourceView Unassigned = new ResourceView(default); + + /// + /// This value contains a valid texture reference that can be bound to a shader, however it is just an empty placeholder texture. + /// Binding this to a shader can be seen as setting the texture variable inside the shader to null. + /// + public static readonly ResourceView None = new ResourceView(BuiltinRenderTextureType.None); + + public ResourceView(in RenderTargetIdentifier renderTarget, RenderTextureSubElement subElement = RenderTextureSubElement.Default, int mipLevel = 0) + { + RenderTarget = renderTarget; + SubElement = subElement; + MipLevel = mipLevel; + } + + public bool IsValid => !RenderTarget.Equals(default); + + public readonly RenderTargetIdentifier RenderTarget; + public readonly RenderTextureSubElement SubElement; + public readonly int MipLevel; + } +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/ResourceView.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/ResourceView.cs.meta new file mode 100644 index 0000000..f7e1122 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/ResourceView.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 6e2e3cd4f5c3d4146b6fe3f93685751b +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: From d7191b98e7a4ef4e83e2f8255be27a2a36960cf5 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Thu, 20 Mar 2025 22:47:44 +0100 Subject: [PATCH 36/88] Applied a few more recent fixes from the FSR2 codebase --- .../Upscaling/ASR/Runtime/AsrContext.cs | 2 +- .../Effects/Upscaling/ASR/Runtime/AsrPass.cs | 25 ++++++++++++++----- Packages/fidelityfx.fsr | 2 +- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs index 245a87b..f16f150 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs @@ -187,7 +187,7 @@ namespace ArmASR // Auto exposure always used to track luma changes in locking logic commandBuffer.SetRenderTarget(_resources.AutoExposure); - commandBuffer.ClearRenderTarget(false, true, new Color(0f, 1e8f, 0f, 0f)); + commandBuffer.ClearRenderTarget(false, true, new Color(0f, 1f, 0f, 0f)); // Reset atomic counter to 0 commandBuffer.SetRenderTarget(_resources.SpdAtomicCounter); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs index 5a76b16..5582f6d 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs @@ -19,6 +19,7 @@ // THE SOFTWARE. using System; +using System.Diagnostics; using UnityEngine; using UnityEngine.Profiling; using UnityEngine.Rendering; @@ -41,7 +42,7 @@ namespace ArmASR protected ComputeShader ComputeShader; protected int KernelIndex; - protected CustomSampler Sampler; + private CustomSampler _sampler; protected AsrPass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) { @@ -56,9 +57,9 @@ namespace ArmASR public void ScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { - commandBuffer.BeginSample(Sampler); + BeginSample(commandBuffer); DoScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchX, dispatchY); - commandBuffer.EndSample(Sampler); + EndSample(commandBuffer); } protected abstract void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY); @@ -77,7 +78,7 @@ namespace ArmASR ComputeShader = shader; KernelIndex = ComputeShader.FindKernel("main"); - Sampler = CustomSampler.Create(passName); + _sampler = CustomSampler.Create(passName); bool useLut = false; #if UNITY_2022_1_OR_NEWER // This will also work in 2020.3.43+ and 2021.3.14+ @@ -95,6 +96,18 @@ namespace ArmASR if (useLut) ComputeShader.EnableKeyword("FFXM_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE"); if ((flags & Asr.InitializationFlags.EnableFP16Usage) != 0) ComputeShader.EnableKeyword("FFXM_HALF"); } + + [Conditional("ENABLE_PROFILER")] + protected void BeginSample(CommandBuffer cmd) + { + cmd.BeginSample(_sampler); + } + + [Conditional("ENABLE_PROFILER")] + protected void EndSample(CommandBuffer cmd) + { + cmd.EndSample(_sampler); + } } internal class AsrComputeLuminancePyramidPass : AsrPass @@ -319,7 +332,7 @@ namespace ArmASR public void ScheduleDispatch(CommandBuffer commandBuffer, Asr.GenerateReactiveDescription dispatchParams, int dispatchX, int dispatchY) { - commandBuffer.BeginSample(Sampler); + BeginSample(commandBuffer); ref var opaqueOnly = ref dispatchParams.ColorOpaqueOnly; ref var color = ref dispatchParams.ColorPreUpscale; @@ -333,7 +346,7 @@ namespace ArmASR commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); - commandBuffer.EndSample(Sampler); + EndSample(commandBuffer); } } } diff --git a/Packages/fidelityfx.fsr b/Packages/fidelityfx.fsr index d65cc3a..e284006 160000 --- a/Packages/fidelityfx.fsr +++ b/Packages/fidelityfx.fsr @@ -1 +1 @@ -Subproject commit d65cc3a35de270a1011c8158810e3d56c579953f +Subproject commit e28400609a43f850dae2b83048e936faa54017e8 From dd33e77b6f1a35e6bd9791b4cfd359ddcf22aba0 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Fri, 21 Mar 2025 22:39:34 +0100 Subject: [PATCH 37/88] Reworked ASR passes to allow for blitting using fragment shaders, and got an idea of the problems yet to solve. Modified assets script and added it to the PPV2 resources. Some additional refactoring and bug fixing as well. --- .../PostProcessing/PostProcessResources.asset | 4 + .../Effects/Upscaling/ASR/Runtime/Asr.cs | 44 +++- .../Upscaling/ASR/Runtime/AsrAssets.cs | 66 ++---- .../Upscaling/ASR/Runtime/AsrContext.cs | 4 +- .../Effects/Upscaling/ASR/Runtime/AsrPass.cs | 213 ++++++++++-------- .../Upscaling/ASR/Runtime/AsrResources.cs | 12 +- .../Runtime/PostProcessResources.cs | 6 + 7 files changed, 195 insertions(+), 154 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset index 954f3a9..36cc0e2 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset @@ -159,3 +159,7 @@ MonoBehaviour: convert: {fileID: 7200000, guid: a41757aacd8b70e42a4001d514bfbe53, type: 3} activate: {fileID: 7200000, guid: d7de362950af6fe4e90da7d6e32f9826, type: 3} upscale: {fileID: 7200000, guid: 5d28d29787492b74aa736a21f70572c7, type: 3} + asrUpscalerShaders: + fragmentShader: {fileID: 4800000, guid: 147cc2cffac69ef4eb3ea8addafc9d10, type: 3} + computeLuminancePyramidPass: {fileID: 7200000, guid: 57220d870cb441c8a6df8a9e15a74283, type: 3} + lockPass: {fileID: 7200000, guid: a6e1d5d5372d467790fcf2d089b50ef7, type: 3} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs index a75fab0..590f5f2 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs @@ -21,6 +21,7 @@ using System; using System.Runtime.InteropServices; using UnityEngine; +using UnityEngine.Rendering; namespace ArmASR { @@ -133,6 +134,45 @@ namespace ArmASR } #endif + /// + /// Alternative for CommandBuffer.SetComputeTextureParam that guards against attempts to bind mip levels that don't exist. + /// + internal static void SetComputeTextureMipParam(this CommandBuffer commandBuffer, ComputeShader computeShader, int kernelIndex, int nameID, Texture texture, int mipLevel) + { + mipLevel = Math.Min(mipLevel, texture.mipmapCount - 1); + commandBuffer.SetComputeTextureParam(computeShader, kernelIndex, nameID, texture, mipLevel); + } + + internal static void SetComputeResourceParam(this CommandBuffer commandBuffer, ComputeShader computeShader, int kernelIndex, int nameID, in ResourceView resource) + { + commandBuffer.SetComputeTextureParam(computeShader, kernelIndex, nameID, resource.RenderTarget, resource.MipLevel, resource.SubElement); + } + + internal static void SetComputeConstantBufferParam(this CommandBuffer commandBuffer, ComputeShader computeShader, int nameID, ComputeBuffer buffer) + { + commandBuffer.SetComputeConstantBufferParam(computeShader, nameID, buffer, 0, buffer.stride); + } + + internal static void SetGlobalResource(this CommandBuffer commandBuffer, int nameID, in ResourceView resource) + { + commandBuffer.SetGlobalTexture(nameID, resource.RenderTarget, resource.SubElement); + } + + internal static void DestroyObject(UnityEngine.Object obj) + { + if (obj == null) + return; + +#if UNITY_EDITOR + if (Application.isPlaying && !UnityEditor.EditorApplication.isPaused) + UnityEngine.Object.Destroy(obj); + else + UnityEngine.Object.DestroyImmediate(obj); +#else + UnityEngine.Object.Destroy(obj); +#endif + } + public enum QualityMode { NativeAA = 0, @@ -171,7 +211,7 @@ namespace ArmASR /// /// A structure encapsulating the parameters for dispatching the various passes of FidelityFX Super Resolution 2. /// - public class DispatchDescription + public class DispatchDescription // TODO: make into struct { public ResourceView Color; public ResourceView Depth; @@ -199,7 +239,7 @@ namespace ArmASR /// /// A structure encapsulating the parameters for automatic generation of a reactive mask. /// - public class GenerateReactiveDescription + public class GenerateReactiveDescription // TODO: make into struct (with static readonly Default property) { public ResourceView ColorOpaqueOnly; public ResourceView ColorPreUpscale; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs index 95ff401..b44859c 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs @@ -36,17 +36,22 @@ namespace ArmASR { shaders = new AsrShaders { + fragmentShader = FindFragmentShader("ffxm_fsr2_fs"), computeLuminancePyramidPass = FindComputeShader("ffxm_fsr2_compute_luminance_pyramid_pass"), - reconstructPreviousDepthPass = FindComputeShader("ffx_fsr2_reconstruct_previous_depth_pass"), - depthClipPass = FindComputeShader("ffx_fsr2_depth_clip_pass"), lockPass = FindComputeShader("ffxm_fsr2_lock_pass"), - accumulatePass = FindComputeShader("ffx_fsr2_accumulate_pass"), - sharpenPass = FindComputeShader("ffx_fsr2_rcas_pass"), - autoGenReactivePass = FindComputeShader("ffx_fsr2_autogen_reactive_pass"), - tcrAutoGenPass = FindComputeShader("ffx_fsr2_tcr_autogen_pass"), }; } + private static Shader FindFragmentShader(string name) + { + string[] assetGuids = UnityEditor.AssetDatabase.FindAssets($"t:Shader {name}"); + if (assetGuids == null || assetGuids.Length == 0) + return null; + + string assetPath = UnityEditor.AssetDatabase.GUIDToAssetPath(assetGuids[0]); + return UnityEditor.AssetDatabase.LoadAssetAtPath(assetPath); + } + private static ComputeShader FindComputeShader(string name) { string[] assetGuids = UnityEditor.AssetDatabase.FindAssets($"t:ComputeShader {name}"); @@ -66,44 +71,19 @@ namespace ArmASR public class AsrShaders { /// - /// The compute shader used by the luminance pyramid computation pass. + /// Combined shader file containing all non-compute passes. /// - public ComputeShader computeLuminancePyramidPass; - - /// - /// The compute shader used by the previous depth reconstruction pass. - /// - public ComputeShader reconstructPreviousDepthPass; - + public Shader fragmentShader; + /// - /// The compute shader used by the depth clip pass. + /// The compute shader used by the luminance pyramid computation pass. /// - public ComputeShader depthClipPass; + public ComputeShader computeLuminancePyramidPass; /// /// The compute shader used by the lock pass. /// public ComputeShader lockPass; - - /// - /// The compute shader used by the accumulation pass. - /// - public ComputeShader accumulatePass; - - /// - /// The compute shader used by the RCAS sharpening pass. - /// - public ComputeShader sharpenPass; - - /// - /// The compute shader used to auto-generate a reactive mask. - /// - public ComputeShader autoGenReactivePass; - - /// - /// The compute shader used to auto-generate a transparency & composition mask. - /// - public ComputeShader tcrAutoGenPass; /// /// Returns a copy of this class and its contents. @@ -122,14 +102,9 @@ namespace ArmASR { return new AsrShaders { + fragmentShader = Object.Instantiate(fragmentShader), computeLuminancePyramidPass = Object.Instantiate(computeLuminancePyramidPass), - reconstructPreviousDepthPass = Object.Instantiate(reconstructPreviousDepthPass), - depthClipPass = Object.Instantiate(depthClipPass), lockPass = Object.Instantiate(lockPass), - accumulatePass = Object.Instantiate(accumulatePass), - sharpenPass = Object.Instantiate(sharpenPass), - autoGenReactivePass = Object.Instantiate(autoGenReactivePass), - tcrAutoGenPass = Object.Instantiate(tcrAutoGenPass), }; } @@ -139,14 +114,9 @@ namespace ArmASR /// public void Dispose() { + Object.Destroy(fragmentShader); Object.Destroy(computeLuminancePyramidPass); - Object.Destroy(reconstructPreviousDepthPass); - Object.Destroy(depthClipPass); Object.Destroy(lockPass); - Object.Destroy(accumulatePass); - Object.Destroy(sharpenPass); - Object.Destroy(autoGenReactivePass); - Object.Destroy(tcrAutoGenPass); } } } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs index f16f150..4175bbe 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs @@ -229,8 +229,8 @@ namespace ArmASR // Dispatch RCAS const int threadGroupWorkRegionDimRcas = 16; - int threadGroupsX = (Screen.width + threadGroupWorkRegionDimRcas - 1) / threadGroupWorkRegionDimRcas; - int threadGroupsY = (Screen.height + threadGroupWorkRegionDimRcas - 1) / threadGroupWorkRegionDimRcas; + int threadGroupsX = (_contextDescription.DisplaySize.x + threadGroupWorkRegionDimRcas - 1) / threadGroupWorkRegionDimRcas; + int threadGroupsY = (_contextDescription.DisplaySize.y + threadGroupWorkRegionDimRcas - 1) / threadGroupWorkRegionDimRcas; _sharpenPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, threadGroupsX, threadGroupsY); } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs index 5582f6d..a166814 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs @@ -19,6 +19,7 @@ // THE SOFTWARE. using System; +using System.Collections.Generic; using System.Diagnostics; using UnityEngine; using UnityEngine.Profiling; @@ -41,6 +42,11 @@ namespace ArmASR protected ComputeShader ComputeShader; protected int KernelIndex; + + protected Material FragmentMaterial; + protected int FragmentPass; + protected MaterialPropertyBlock FragmentProperties; + protected readonly List FragmentKeywords = new(); private CustomSampler _sampler; @@ -53,6 +59,13 @@ namespace ArmASR public virtual void Dispose() { + if (FragmentMaterial != null) + { + Asr.DestroyObject(FragmentMaterial); + FragmentMaterial = null; + } + + FragmentKeywords.Clear(); } public void ScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) @@ -80,6 +93,45 @@ namespace ArmASR KernelIndex = ComputeShader.FindKernel("main"); _sampler = CustomSampler.Create(passName); + foreach (string keyword in GetKeywords(flags)) + { + ComputeShader.EnableKeyword(keyword); + } + } + + protected void InitFragmentShader(string passName, Shader shader, int passNumber) + { + InitFragmentShader(passName, shader, passNumber, ContextDescription.Flags); + } + + private void InitFragmentShader(string passName, Shader shader, int passNumber, Asr.InitializationFlags flags) + { + if (shader == null) + { + throw new MissingReferenceException($"Shader for ASR pass '{passName}' could not be loaded! Please ensure it is included in the project correctly."); + } + + FragmentMaterial = new Material(shader); + FragmentPass = passNumber; + _sampler = CustomSampler.Create(passName); + + FragmentProperties = new MaterialPropertyBlock(); + foreach (string keyword in GetKeywords(flags)) + { + // TODO: also want to include keywords that should be disabled (false) + // TODO: might be better to just determine all the keywords once up front and set them globally, probably don't even need to manage them here but in the AsrContext instead + // NOTE: be mindful of UNITY_2021_2_OR_NEWER (Local & GlobalKeyword were introduced there) + FragmentKeywords.Add(new LocalKeyword(shader, keyword)); + } + } + + protected void BlitFragment(CommandBuffer commandBuffer) + { + commandBuffer.DrawProcedural(Matrix4x4.identity, FragmentMaterial, FragmentPass, MeshTopology.Triangles, 3, 1, FragmentProperties); + } + + private static IEnumerable GetKeywords(Asr.InitializationFlags flags) + { bool useLut = false; #if UNITY_2022_1_OR_NEWER // This will also work in 2020.3.43+ and 2021.3.14+ if (SystemInfo.computeSubGroupSize == 64) @@ -89,12 +141,12 @@ namespace ArmASR #endif // This matches the permutation rules from the CreatePipeline* functions - if ((flags & Asr.InitializationFlags.EnableHighDynamicRange) != 0) ComputeShader.EnableKeyword("FFXM_FSR2_OPTION_HDR_COLOR_INPUT"); - if ((flags & Asr.InitializationFlags.EnableDisplayResolutionMotionVectors) == 0) ComputeShader.EnableKeyword("FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS"); - if ((flags & Asr.InitializationFlags.EnableMotionVectorsJitterCancellation) != 0) ComputeShader.EnableKeyword("FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS"); - if ((flags & Asr.InitializationFlags.EnableDepthInverted) != 0) ComputeShader.EnableKeyword("FFXM_FSR2_OPTION_INVERTED_DEPTH"); - if (useLut) ComputeShader.EnableKeyword("FFXM_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE"); - if ((flags & Asr.InitializationFlags.EnableFP16Usage) != 0) ComputeShader.EnableKeyword("FFXM_HALF"); + if ((flags & Asr.InitializationFlags.EnableHighDynamicRange) != 0) yield return "FFXM_FSR2_OPTION_HDR_COLOR_INPUT"; + if ((flags & Asr.InitializationFlags.EnableDisplayResolutionMotionVectors) == 0) yield return "FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS"; + if ((flags & Asr.InitializationFlags.EnableMotionVectorsJitterCancellation) != 0) yield return "FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS"; + if ((flags & Asr.InitializationFlags.EnableDepthInverted) != 0) yield return "FFXM_FSR2_OPTION_INVERTED_DEPTH"; + if (useLut) yield return "FFXM_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE"; + if ((flags & Asr.InitializationFlags.EnableFP16Usage) != 0) yield return "FFXM_HALF"; } [Conditional("ENABLE_PROFILER")] @@ -124,8 +176,7 @@ namespace ArmASR protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { - ref var color = ref dispatchParams.Color; - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputColor, color.RenderTarget, color.MipLevel, color.SubElement); + commandBuffer.SetComputeResourceParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputColor, dispatchParams.Color); commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavSpdAtomicCount, Resources.SpdAtomicCounter); commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavExposureMipLumaChange, Resources.SceneLuminance, ShadingChangeMipLevel); @@ -144,26 +195,21 @@ namespace ArmASR public AsrReconstructPreviousDepthPass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) : base(contextDescription, resources, constants) { - InitComputeShader("Reconstruct & Dilate", contextDescription.Shaders.reconstructPreviousDepthPass); + InitFragmentShader("Reconstruct & Dilate", contextDescription.Shaders.fragmentShader, 1); } protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { - ref var color = ref dispatchParams.Color; - ref var depth = ref dispatchParams.Depth; - ref var motionVectors = ref dispatchParams.MotionVectors; - ref var exposure = ref dispatchParams.Exposure; + commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputColor, dispatchParams.Color); + commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputDepth, dispatchParams.Depth); + commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputMotionVectors, dispatchParams.MotionVectors); + commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputExposure, dispatchParams.Exposure); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputColor, color.RenderTarget, color.MipLevel, color.SubElement); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputDepth, depth.RenderTarget, depth.MipLevel, depth.SubElement); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputMotionVectors, motionVectors.RenderTarget, motionVectors.MipLevel, motionVectors.SubElement); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputExposure, exposure.RenderTarget, exposure.MipLevel, exposure.SubElement); - - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]); + // TODO: this actually needs to be the render target + commandBuffer.SetGlobalTexture(AsrShaderIDs.UavDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]); - commandBuffer.SetComputeConstantBufferParam(ComputeShader, AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); - - commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); + BlitFragment(commandBuffer); } } @@ -172,33 +218,25 @@ namespace ArmASR public AsrDepthClipPass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) : base(contextDescription, resources, constants) { - InitComputeShader("Depth Clip", contextDescription.Shaders.depthClipPass); + InitFragmentShader("Depth Clip", contextDescription.Shaders.fragmentShader, 2); } protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { - ref var color = ref dispatchParams.Color; - ref var depth = ref dispatchParams.Depth; - ref var motionVectors = ref dispatchParams.MotionVectors; - ref var exposure = ref dispatchParams.Exposure; - ref var reactive = ref dispatchParams.Reactive; - ref var tac = ref dispatchParams.TransparencyAndComposition; - - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputColor, color.RenderTarget, color.MipLevel, color.SubElement); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputDepth, depth.RenderTarget, depth.MipLevel, depth.SubElement); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputMotionVectors, motionVectors.RenderTarget, motionVectors.MipLevel, motionVectors.SubElement); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputExposure, exposure.RenderTarget, exposure.MipLevel, exposure.SubElement); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvReactiveMask, reactive.RenderTarget, reactive.MipLevel, reactive.SubElement); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvTransparencyAndCompositionMask, tac.RenderTarget, tac.MipLevel, tac.SubElement); - - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvReconstructedPrevNearestDepth, AsrShaderIDs.UavReconstructedPrevNearestDepth); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvDilatedDepth, AsrShaderIDs.UavDilatedDepth); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvPrevDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex ^ 1]); - - commandBuffer.SetComputeConstantBufferParam(ComputeShader, AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); - - commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputColor, dispatchParams.Color); + commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputDepth, dispatchParams.Depth); + commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputMotionVectors, dispatchParams.MotionVectors); + commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputExposure, dispatchParams.Exposure); + commandBuffer.SetGlobalResource(AsrShaderIDs.SrvReactiveMask, dispatchParams.Reactive); + commandBuffer.SetGlobalResource(AsrShaderIDs.SrvTransparencyAndCompositionMask, dispatchParams.TransparencyAndComposition); + + commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvReconstructedPrevNearestDepth, AsrShaderIDs.UavReconstructedPrevNearestDepth); + commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]); + commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvDilatedDepth, AsrShaderIDs.UavDilatedDepth); + commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvPrevDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex ^ 1]); + + FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); + BlitFragment(commandBuffer); } } @@ -230,9 +268,9 @@ namespace ArmASR public AsrAccumulatePass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) : base(contextDescription, resources, constants) { - InitComputeShader("Reproject & Accumulate", contextDescription.Shaders.accumulatePass); + InitFragmentShader("Reproject & Accumulate", contextDescription.Shaders.fragmentShader, 3); #if UNITY_2021_2_OR_NEWER - _sharpeningKeyword = new LocalKeyword(ComputeShader, SharpeningKeyword); + _sharpeningKeyword = new LocalKeyword(ComputeShader, SharpeningKeyword); // TODO: dynamically enable this on MaterialPropertyBlock #endif } @@ -252,37 +290,35 @@ namespace ArmASR if ((ContextDescription.Flags & Asr.InitializationFlags.EnableDisplayResolutionMotionVectors) == 0) { - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]); + commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]); } else { - ref var motionVectors = ref dispatchParams.MotionVectors; - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputMotionVectors, motionVectors.RenderTarget, motionVectors.MipLevel, motionVectors.SubElement); + commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputMotionVectors, dispatchParams.MotionVectors); } - ref var exposure = ref dispatchParams.Exposure; - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputExposure, exposure.RenderTarget, exposure.MipLevel, exposure.SubElement); - - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvDilatedReactiveMasks, AsrShaderIDs.UavDilatedReactiveMasks); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInternalUpscaled, Resources.InternalUpscaled[frameIndex ^ 1]); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvLockStatus, Resources.LockStatus[frameIndex ^ 1]); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvPreparedInputColor, AsrShaderIDs.UavPreparedInputColor); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvLanczosLut, Resources.LanczosLut); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvUpscaleMaximumBiasLut, Resources.MaximumBiasLut); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvSceneLuminanceMips, Resources.SceneLuminance); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvAutoExposure, Resources.AutoExposure); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvLumaHistory, Resources.LumaHistory[frameIndex ^ 1]); - - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavInternalUpscaled, Resources.InternalUpscaled[frameIndex]); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavLockStatus, Resources.LockStatus[frameIndex]); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavLumaHistory, Resources.LumaHistory[frameIndex]); + commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputExposure, dispatchParams.Exposure); + + commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvDilatedReactiveMasks, AsrShaderIDs.UavDilatedReactiveMasks); + commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvInternalUpscaled, Resources.InternalUpscaled[frameIndex ^ 1]); + commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvLockStatus, Resources.LockStatus[frameIndex ^ 1]); + commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvPreparedInputColor, AsrShaderIDs.UavPreparedInputColor); + commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvLanczosLut, Resources.LanczosLut); + commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvUpscaleMaximumBiasLut, Resources.MaximumBiasLut); + commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvSceneLuminanceMips, Resources.SceneLuminance); + commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvAutoExposure, Resources.AutoExposure); + commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvLumaHistory, Resources.LumaHistory[frameIndex ^ 1]); + + // TODO: these need to be multi-render targets (and also vary depending on the ASR preset used) + commandBuffer.SetGlobalTexture(AsrShaderIDs.UavInternalUpscaled, Resources.InternalUpscaled[frameIndex]); + commandBuffer.SetGlobalTexture(AsrShaderIDs.UavLockStatus, Resources.LockStatus[frameIndex]); + commandBuffer.SetGlobalTexture(AsrShaderIDs.UavLumaHistory, Resources.LumaHistory[frameIndex]); - ref var output = ref dispatchParams.Output; - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavUpscaledOutput, output.RenderTarget, output.MipLevel, output.SubElement); + // TODO: this actually needs to be the render target + commandBuffer.SetGlobalResource(AsrShaderIDs.UavUpscaledOutput, dispatchParams.Output); - commandBuffer.SetComputeConstantBufferParam(ComputeShader, AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); - - commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); + BlitFragment(commandBuffer); } } @@ -295,22 +331,20 @@ namespace ArmASR { _rcasConstants = rcasConstants; - InitComputeShader("RCAS Sharpening", contextDescription.Shaders.sharpenPass); + InitFragmentShader("RCAS Sharpening", contextDescription.Shaders.fragmentShader, 4); } protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { - ref var exposure = ref dispatchParams.Exposure; - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputExposure, exposure.RenderTarget, exposure.MipLevel, exposure.SubElement); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvRcasInput, Resources.InternalUpscaled[frameIndex]); + commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputExposure, dispatchParams.Exposure); + commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvRcasInput, Resources.InternalUpscaled[frameIndex]); - ref var output = ref dispatchParams.Output; - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavUpscaledOutput, output.RenderTarget, output.MipLevel, output.SubElement); + // TODO: this actually needs to be the render target + commandBuffer.SetGlobalResource(AsrShaderIDs.UavUpscaledOutput, dispatchParams.Output); - commandBuffer.SetComputeConstantBufferParam(ComputeShader, AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); - commandBuffer.SetComputeConstantBufferParam(ComputeShader, AsrShaderIDs.CbRcas, _rcasConstants, 0, _rcasConstants.stride); - - commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); + FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbRcas, _rcasConstants, 0, _rcasConstants.stride); + BlitFragment(commandBuffer); } } @@ -323,7 +357,7 @@ namespace ArmASR { _generateReactiveConstants = generateReactiveConstants; - InitComputeShader("Auto-Generate Reactive Mask", contextDescription.Shaders.autoGenReactivePass); + InitFragmentShader("Auto-Generate Reactive Mask", contextDescription.Shaders.fragmentShader, 0); } protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) @@ -334,17 +368,14 @@ namespace ArmASR { BeginSample(commandBuffer); - ref var opaqueOnly = ref dispatchParams.ColorOpaqueOnly; - ref var color = ref dispatchParams.ColorPreUpscale; - ref var reactive = ref dispatchParams.OutReactive; + commandBuffer.SetComputeResourceParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvOpaqueOnly, dispatchParams.ColorOpaqueOnly); + commandBuffer.SetComputeResourceParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputColor, dispatchParams.ColorPreUpscale); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvOpaqueOnly, opaqueOnly.RenderTarget, opaqueOnly.MipLevel, opaqueOnly.SubElement); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputColor, color.RenderTarget, color.MipLevel, color.SubElement); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavAutoReactive, reactive.RenderTarget, reactive.MipLevel, reactive.SubElement); + // TODO: this actually needs to be the render target + commandBuffer.SetComputeResourceParam(ComputeShader, KernelIndex, AsrShaderIDs.UavAutoReactive, dispatchParams.OutReactive); - commandBuffer.SetComputeConstantBufferParam(ComputeShader, AsrShaderIDs.CbGenReactive, _generateReactiveConstants, 0, _generateReactiveConstants.stride); - - commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbGenReactive, _generateReactiveConstants, 0, _generateReactiveConstants.stride); + BlitFragment(commandBuffer); EndSample(commandBuffer); } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs index 44dd65a..a879238 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs @@ -173,17 +173,7 @@ namespace ArmASR private static void DestroyResource(ref Texture2D resource) { - if (resource == null) - return; - -#if UNITY_EDITOR - if (Application.isPlaying && !UnityEditor.EditorApplication.isPaused) - UnityEngine.Object.Destroy(resource); - else - UnityEngine.Object.DestroyImmediate(resource); -#else - UnityEngine.Object.Destroy(resource); -#endif + Asr.DestroyObject(resource); resource = null; } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessResources.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessResources.cs index 8d62f44..867095e 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessResources.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessResources.cs @@ -1,4 +1,5 @@ using System; +using ArmASR; using FidelityFX.FSR2; using FidelityFX.FSR3; using UnityEngine.Serialization; @@ -292,6 +293,11 @@ namespace UnityEngine.Rendering.PostProcessing /// All the compute shaders used by post-processing. /// public ComputeShaders computeShaders; + + /// + /// Shaders used by the Arm Accuracy Super Resolution (ASR) Upscaler. + /// + public AsrShaders asrUpscalerShaders; #if UNITY_EDITOR /// From d5bbadaa775306c8918c3cca607cfed18bfe170c Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 22 Mar 2025 12:29:04 +0100 Subject: [PATCH 38/88] Made all the ASR shader keywords global and encapsulated all of the keyword management into a separate class, which will enable and disable keywords globally based on the initialization flags and dispatch parameters. --- .../Upscaling/ASR/Runtime/AsrContext.cs | 4 + .../Upscaling/ASR/Runtime/AsrKeywords.cs | 124 ++++++++++++++++++ .../Upscaling/ASR/Runtime/AsrKeywords.cs.meta | 3 + .../Effects/Upscaling/ASR/Runtime/AsrPass.cs | 77 ++--------- ...sr2_compute_luminance_pyramid_pass.compute | 8 +- .../ASR/Shaders/ffxm_fsr2_lock_pass.compute | 8 +- 6 files changed, 148 insertions(+), 76 deletions(-) create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrKeywords.cs create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrKeywords.cs.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs index 4175bbe..a1587af 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs @@ -65,6 +65,8 @@ namespace ArmASR private readonly Asr.GenerateReactiveConstants[] _generateReactiveConstantsArray = { new Asr.GenerateReactiveConstants() }; private ref Asr.GenerateReactiveConstants GenReactiveConsts => ref _generateReactiveConstantsArray[0]; + private AsrKeywords _keywords = new(); + private bool _firstExecution; private Vector2 _previousJitterOffset; private int _resourceFrameIndex; @@ -142,6 +144,8 @@ namespace ArmASR if (dispatchParams.UseTextureArrays) commandBuffer.EnableShaderKeyword("UNITY_FFXM_TEXTURE2D_X_ARRAY"); + _keywords.ApplyKeywords(commandBuffer, _contextDescription.Flags, dispatchParams); + if (_firstExecution) { commandBuffer.SetRenderTarget(_resources.LockStatus[0]); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrKeywords.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrKeywords.cs new file mode 100644 index 0000000..673e57f --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrKeywords.cs @@ -0,0 +1,124 @@ +using UnityEngine; +using UnityEngine.Rendering; + +namespace ArmASR +{ + public class AsrKeywords + { + private static readonly string OptionHalfPrecision = "FFXM_HALF"; + private static readonly string OptionHdrColorInput = "FFXM_FSR2_OPTION_HDR_COLOR_INPUT"; + private static readonly string OptionLowResolutionMotionVectors = "FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS"; + private static readonly string OptionJitteredMotionVectors = "FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS"; + private static readonly string OptionInvertedDepth = "FFXM_FSR2_OPTION_INVERTED_DEPTH"; + private static readonly string OptionReprojectUseLut = "FFXM_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE"; + private static readonly string OptionApplySharpening = "FFXM_FSR2_OPTION_APPLY_SHARPENING"; + +#if UNITY_2021_2_OR_NEWER + private readonly GlobalKeyword _halfPrecisionKeyword; + private readonly GlobalKeyword _hdrColorInputKeyword; + private readonly GlobalKeyword _lowResMotionVectorsKeyword; + private readonly GlobalKeyword _jitteredMotionVectorsKeyword; + private readonly GlobalKeyword _invertedDepthKeyword; + private readonly GlobalKeyword _reprojectUseLutKeyword; + private readonly GlobalKeyword _applySharpeningKeyword; +#endif + + public AsrKeywords() + { +#if UNITY_2021_2_OR_NEWER + _halfPrecisionKeyword = new GlobalKeyword(OptionHalfPrecision); + _hdrColorInputKeyword = new GlobalKeyword(OptionHdrColorInput); + _lowResMotionVectorsKeyword = new GlobalKeyword(OptionLowResolutionMotionVectors); + _jitteredMotionVectorsKeyword = new GlobalKeyword(OptionJitteredMotionVectors); + _invertedDepthKeyword = new GlobalKeyword(OptionInvertedDepth); + _reprojectUseLutKeyword = new GlobalKeyword(OptionReprojectUseLut); + _applySharpeningKeyword = new GlobalKeyword(OptionApplySharpening); +#endif + } + + public void ApplyKeywords(CommandBuffer commandBuffer, Asr.InitializationFlags initFlags, in Asr.DispatchDescription dispatchParams) + { + bool useLut = false; +#if UNITY_2022_1_OR_NEWER // This will also work in 2020.3.43+ and 2021.3.14+ + if (SystemInfo.computeSubGroupSize == 64) + { + useLut = true; + } +#endif + + // This matches the permutation rules from the CreatePipeline* functions +#if UNITY_2021_2_OR_NEWER + if ((initFlags & Asr.InitializationFlags.EnableFP16Usage) != 0) + commandBuffer.EnableKeyword(_halfPrecisionKeyword); + else + commandBuffer.DisableKeyword(_halfPrecisionKeyword); + + if ((initFlags & Asr.InitializationFlags.EnableHighDynamicRange) != 0) + commandBuffer.EnableKeyword(_hdrColorInputKeyword); + else + commandBuffer.DisableKeyword(_hdrColorInputKeyword); + + if ((initFlags & Asr.InitializationFlags.EnableDisplayResolutionMotionVectors) == 0) + commandBuffer.EnableKeyword(_lowResMotionVectorsKeyword); + else + commandBuffer.DisableKeyword(_lowResMotionVectorsKeyword); + + if ((initFlags & Asr.InitializationFlags.EnableMotionVectorsJitterCancellation) != 0) + commandBuffer.EnableKeyword(_jitteredMotionVectorsKeyword); + else + commandBuffer.DisableKeyword(_jitteredMotionVectorsKeyword); + + if ((initFlags & Asr.InitializationFlags.EnableDepthInverted) != 0) + commandBuffer.EnableKeyword(_invertedDepthKeyword); + else + commandBuffer.DisableKeyword(_invertedDepthKeyword); + + if (useLut) + commandBuffer.EnableKeyword(_reprojectUseLutKeyword); + else + commandBuffer.DisableKeyword(_reprojectUseLutKeyword); + + if (dispatchParams.EnableSharpening) + commandBuffer.EnableKeyword(_applySharpeningKeyword); + else + commandBuffer.DisableKeyword(_applySharpeningKeyword); +#else + if ((initFlags & Asr.InitializationFlags.EnableFP16Usage) != 0) + commandBuffer.EnableShaderKeyword(OptionHalfPrecision); + else + commandBuffer.DisableShaderKeyword(OptionHalfPrecision); + + if ((initFlags & Asr.InitializationFlags.EnableHighDynamicRange) != 0) + commandBuffer.EnableShaderKeyword(OptionHdrColorInput); + else + commandBuffer.DisableShaderKeyword(OptionHdrColorInput); + + if ((initFlags & Asr.InitializationFlags.EnableDisplayResolutionMotionVectors) == 0) + commandBuffer.EnableShaderKeyword(OptionLowResolutionMotionVectors); + else + commandBuffer.DisableShaderKeyword(OptionLowResolutionMotionVectors); + + if ((initFlags & Asr.InitializationFlags.EnableMotionVectorsJitterCancellation) != 0) + commandBuffer.EnableShaderKeyword(OptionJitteredMotionVectors); + else + commandBuffer.DisableShaderKeyword(OptionJitteredMotionVectors); + + if ((initFlags & Asr.InitializationFlags.EnableDepthInverted) != 0) + commandBuffer.EnableShaderKeyword(OptionInvertedDepth); + else + commandBuffer.DisableShaderKeyword(OptionInvertedDepth); + + if (useLut) + commandBuffer.EnableShaderKeyword(OptionReprojectUseLut); + else + commandBuffer.DisableShaderKeyword(OptionReprojectUseLut); + + if (dispatchParams.EnableSharpening) + commandBuffer.EnableShaderKeyword(OptionApplySharpening); + else + commandBuffer.DisableShaderKeyword(OptionApplySharpening); +#endif + + } + } +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrKeywords.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrKeywords.cs.meta new file mode 100644 index 0000000..5b51256 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrKeywords.cs.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: a2636bdd1878444fb3d3475610d379df +timeCreated: 1742641520 \ No newline at end of file diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs index a166814..27f62b7 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs @@ -46,7 +46,6 @@ namespace ArmASR protected Material FragmentMaterial; protected int FragmentPass; protected MaterialPropertyBlock FragmentProperties; - protected readonly List FragmentKeywords = new(); private CustomSampler _sampler; @@ -64,8 +63,6 @@ namespace ArmASR Asr.DestroyObject(FragmentMaterial); FragmentMaterial = null; } - - FragmentKeywords.Clear(); } public void ScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) @@ -78,11 +75,6 @@ namespace ArmASR protected abstract void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY); protected void InitComputeShader(string passName, ComputeShader shader) - { - InitComputeShader(passName, shader, ContextDescription.Flags); - } - - private void InitComputeShader(string passName, ComputeShader shader, Asr.InitializationFlags flags) { if (shader == null) { @@ -92,19 +84,9 @@ namespace ArmASR ComputeShader = shader; KernelIndex = ComputeShader.FindKernel("main"); _sampler = CustomSampler.Create(passName); - - foreach (string keyword in GetKeywords(flags)) - { - ComputeShader.EnableKeyword(keyword); - } } protected void InitFragmentShader(string passName, Shader shader, int passNumber) - { - InitFragmentShader(passName, shader, passNumber, ContextDescription.Flags); - } - - private void InitFragmentShader(string passName, Shader shader, int passNumber, Asr.InitializationFlags flags) { if (shader == null) { @@ -113,42 +95,22 @@ namespace ArmASR FragmentMaterial = new Material(shader); FragmentPass = passNumber; - _sampler = CustomSampler.Create(passName); - FragmentProperties = new MaterialPropertyBlock(); - foreach (string keyword in GetKeywords(flags)) - { - // TODO: also want to include keywords that should be disabled (false) - // TODO: might be better to just determine all the keywords once up front and set them globally, probably don't even need to manage them here but in the AsrContext instead - // NOTE: be mindful of UNITY_2021_2_OR_NEWER (Local & GlobalKeyword were introduced there) - FragmentKeywords.Add(new LocalKeyword(shader, keyword)); - } + _sampler = CustomSampler.Create(passName); } - protected void BlitFragment(CommandBuffer commandBuffer) + protected void BlitFragment(CommandBuffer commandBuffer, params RenderTargetIdentifier[] renderTargets) { + if (renderTargets == null || renderTargets.Length == 0) + commandBuffer.SetRenderTarget(BuiltinRenderTextureType.None); + else if (renderTargets.Length == 1) + commandBuffer.SetRenderTarget(renderTargets[0]); + else + commandBuffer.SetRenderTarget(renderTargets, BuiltinRenderTextureType.None); + commandBuffer.DrawProcedural(Matrix4x4.identity, FragmentMaterial, FragmentPass, MeshTopology.Triangles, 3, 1, FragmentProperties); } - private static IEnumerable GetKeywords(Asr.InitializationFlags flags) - { - bool useLut = false; -#if UNITY_2022_1_OR_NEWER // This will also work in 2020.3.43+ and 2021.3.14+ - if (SystemInfo.computeSubGroupSize == 64) - { - useLut = true; - } -#endif - - // This matches the permutation rules from the CreatePipeline* functions - if ((flags & Asr.InitializationFlags.EnableHighDynamicRange) != 0) yield return "FFXM_FSR2_OPTION_HDR_COLOR_INPUT"; - if ((flags & Asr.InitializationFlags.EnableDisplayResolutionMotionVectors) == 0) yield return "FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS"; - if ((flags & Asr.InitializationFlags.EnableMotionVectorsJitterCancellation) != 0) yield return "FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS"; - if ((flags & Asr.InitializationFlags.EnableDepthInverted) != 0) yield return "FFXM_FSR2_OPTION_INVERTED_DEPTH"; - if (useLut) yield return "FFXM_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE"; - if ((flags & Asr.InitializationFlags.EnableFP16Usage) != 0) yield return "FFXM_HALF"; - } - [Conditional("ENABLE_PROFILER")] protected void BeginSample(CommandBuffer cmd) { @@ -259,35 +221,14 @@ namespace ArmASR internal class AsrAccumulatePass : AsrPass { - private const string SharpeningKeyword = "FFXM_FSR2_OPTION_APPLY_SHARPENING"; - -#if UNITY_2021_2_OR_NEWER - private readonly LocalKeyword _sharpeningKeyword; -#endif - public AsrAccumulatePass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) : base(contextDescription, resources, constants) { InitFragmentShader("Reproject & Accumulate", contextDescription.Shaders.fragmentShader, 3); -#if UNITY_2021_2_OR_NEWER - _sharpeningKeyword = new LocalKeyword(ComputeShader, SharpeningKeyword); // TODO: dynamically enable this on MaterialPropertyBlock -#endif } protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { -#if UNITY_2021_2_OR_NEWER - if (dispatchParams.EnableSharpening) - commandBuffer.EnableKeyword(ComputeShader, _sharpeningKeyword); - else - commandBuffer.DisableKeyword(ComputeShader, _sharpeningKeyword); -#else - if (dispatchParams.EnableSharpening) - commandBuffer.EnableShaderKeyword(SharpeningKeyword); - else - commandBuffer.DisableShaderKeyword(SharpeningKeyword); -#endif - if ((ContextDescription.Flags & Asr.InitializationFlags.EnableDisplayResolutionMotionVectors) == 0) { commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute index cab6cf5..5d4044b 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute @@ -1,9 +1,9 @@ #pragma kernel main -#pragma multi_compile_local __ FFXM_HALF -#pragma multi_compile_local __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS -#pragma multi_compile_local __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS -#pragma multi_compile_local __ FFXM_FSR2_OPTION_INVERTED_DEPTH +#pragma multi_compile __ FFXM_HALF +#pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute index 9e3a2a4..660b922 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute @@ -1,9 +1,9 @@ #pragma kernel main -#pragma multi_compile_local __ FFXM_HALF -#pragma multi_compile_local __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS -#pragma multi_compile_local __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS -#pragma multi_compile_local __ FFXM_FSR2_OPTION_INVERTED_DEPTH +#pragma multi_compile __ FFXM_HALF +#pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY From a0eeda1e8456211ce4389a8a4edee921755b9fa0 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 22 Mar 2025 12:43:53 +0100 Subject: [PATCH 39/88] Updated fragment shader passes to provide their output render targets to the Blit function, including multi-render targets --- .../Effects/Upscaling/ASR/Runtime/AsrPass.cs | 67 ++++++++++--------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs index 27f62b7..8fce782 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs @@ -21,6 +21,7 @@ using System; using System.Collections.Generic; using System.Diagnostics; +using System.Runtime.CompilerServices; using UnityEngine; using UnityEngine.Profiling; using UnityEngine.Rendering; @@ -99,25 +100,33 @@ namespace ArmASR _sampler = CustomSampler.Create(passName); } - protected void BlitFragment(CommandBuffer commandBuffer, params RenderTargetIdentifier[] renderTargets) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + protected void BlitFragment(CommandBuffer commandBuffer, RenderTargetIdentifier renderTarget) + { + commandBuffer.SetRenderTarget(renderTarget); + BlitFragment(commandBuffer); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + protected void BlitFragment(CommandBuffer commandBuffer, RenderTargetIdentifier[] renderTargets) + { + commandBuffer.SetRenderTarget(renderTargets, BuiltinRenderTextureType.None); + BlitFragment(commandBuffer); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void BlitFragment(CommandBuffer commandBuffer) { - if (renderTargets == null || renderTargets.Length == 0) - commandBuffer.SetRenderTarget(BuiltinRenderTextureType.None); - else if (renderTargets.Length == 1) - commandBuffer.SetRenderTarget(renderTargets[0]); - else - commandBuffer.SetRenderTarget(renderTargets, BuiltinRenderTextureType.None); - commandBuffer.DrawProcedural(Matrix4x4.identity, FragmentMaterial, FragmentPass, MeshTopology.Triangles, 3, 1, FragmentProperties); } - [Conditional("ENABLE_PROFILER")] + [Conditional("ENABLE_PROFILER"), MethodImpl(MethodImplOptions.AggressiveInlining)] protected void BeginSample(CommandBuffer cmd) { cmd.BeginSample(_sampler); } - [Conditional("ENABLE_PROFILER")] + [Conditional("ENABLE_PROFILER"), MethodImpl(MethodImplOptions.AggressiveInlining)] protected void EndSample(CommandBuffer cmd) { cmd.EndSample(_sampler); @@ -167,16 +176,15 @@ namespace ArmASR commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputMotionVectors, dispatchParams.MotionVectors); commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputExposure, dispatchParams.Exposure); - // TODO: this actually needs to be the render target - commandBuffer.SetGlobalTexture(AsrShaderIDs.UavDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]); - FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); - BlitFragment(commandBuffer); + BlitFragment(commandBuffer, Resources.DilatedMotionVectors[frameIndex]); } } internal class AsrDepthClipPass : AsrPass { + private readonly RenderTargetIdentifier[] _mrt = new RenderTargetIdentifier[2]; + public AsrDepthClipPass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) : base(contextDescription, resources, constants) { @@ -197,8 +205,11 @@ namespace ArmASR commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvDilatedDepth, AsrShaderIDs.UavDilatedDepth); commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvPrevDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex ^ 1]); + _mrt[0] = AsrShaderIDs.UavDilatedReactiveMasks; + _mrt[1] = BuiltinRenderTextureType.None; // TODO: Tonemapped + FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); - BlitFragment(commandBuffer); + BlitFragment(commandBuffer, _mrt); } } @@ -221,6 +232,8 @@ namespace ArmASR internal class AsrAccumulatePass : AsrPass { + private readonly RenderTargetIdentifier[] _mrt = new RenderTargetIdentifier[4]; + public AsrAccumulatePass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) : base(contextDescription, resources, constants) { @@ -250,16 +263,14 @@ namespace ArmASR commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvAutoExposure, Resources.AutoExposure); commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvLumaHistory, Resources.LumaHistory[frameIndex ^ 1]); - // TODO: these need to be multi-render targets (and also vary depending on the ASR preset used) - commandBuffer.SetGlobalTexture(AsrShaderIDs.UavInternalUpscaled, Resources.InternalUpscaled[frameIndex]); - commandBuffer.SetGlobalTexture(AsrShaderIDs.UavLockStatus, Resources.LockStatus[frameIndex]); - commandBuffer.SetGlobalTexture(AsrShaderIDs.UavLumaHistory, Resources.LumaHistory[frameIndex]); + // TODO: these need to vary depending on the ASR preset used (below is for Quality preset) + _mrt[0] = Resources.InternalUpscaled[frameIndex]; // TODO: ColorAndWeight + _mrt[1] = Resources.LockStatus[frameIndex]; + _mrt[2] = Resources.LumaHistory[frameIndex]; + _mrt[3] = dispatchParams.EnableSharpening ? BuiltinRenderTextureType.None : dispatchParams.Output.RenderTarget; - // TODO: this actually needs to be the render target - commandBuffer.SetGlobalResource(AsrShaderIDs.UavUpscaledOutput, dispatchParams.Output); - FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); - BlitFragment(commandBuffer); + BlitFragment(commandBuffer, _mrt); } } @@ -280,12 +291,9 @@ namespace ArmASR commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputExposure, dispatchParams.Exposure); commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvRcasInput, Resources.InternalUpscaled[frameIndex]); - // TODO: this actually needs to be the render target - commandBuffer.SetGlobalResource(AsrShaderIDs.UavUpscaledOutput, dispatchParams.Output); - FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbRcas, _rcasConstants, 0, _rcasConstants.stride); - BlitFragment(commandBuffer); + BlitFragment(commandBuffer, dispatchParams.Output.RenderTarget); } } @@ -312,11 +320,8 @@ namespace ArmASR commandBuffer.SetComputeResourceParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvOpaqueOnly, dispatchParams.ColorOpaqueOnly); commandBuffer.SetComputeResourceParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputColor, dispatchParams.ColorPreUpscale); - // TODO: this actually needs to be the render target - commandBuffer.SetComputeResourceParam(ComputeShader, KernelIndex, AsrShaderIDs.UavAutoReactive, dispatchParams.OutReactive); - FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbGenReactive, _generateReactiveConstants, 0, _generateReactiveConstants.stride); - BlitFragment(commandBuffer); + BlitFragment(commandBuffer, dispatchParams.OutReactive.RenderTarget); EndSample(commandBuffer); } From d320a76b89cd4ca151a37c0b12d3fb3bf6180d33 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 22 Mar 2025 12:55:29 +0100 Subject: [PATCH 40/88] Turned dispatch descriptions into structs, using in-parameters where applicable --- .../Effects/Upscaling/ASR/Runtime/Asr.cs | 22 ++++++++++----- .../Upscaling/ASR/Runtime/AsrContext.cs | 28 ++++++++----------- .../Effects/Upscaling/ASR/Runtime/AsrPass.cs | 2 +- 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs index 590f5f2..158c196 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs @@ -44,7 +44,7 @@ namespace ArmASR flags |= InitializationFlags.EnableDebugChecking; #endif - Debug.Log($"Setting up Arm ASR with render size: {maxRenderSize.x}x{maxRenderSize.y}, display size: {displaySize.x}x{displaySize.y}, flags: {flags}"); + Debug.Log($"Setting up ASR with render size: {maxRenderSize.x}x{maxRenderSize.y}, display size: {displaySize.x}x{displaySize.y}, flags: {flags}"); var contextDescription = new ContextDescription { @@ -211,7 +211,7 @@ namespace ArmASR /// /// A structure encapsulating the parameters for dispatching the various passes of FidelityFX Super Resolution 2. /// - public class DispatchDescription // TODO: make into struct + public struct DispatchDescription { public ResourceView Color; public ResourceView Depth; @@ -239,16 +239,24 @@ namespace ArmASR /// /// A structure encapsulating the parameters for automatic generation of a reactive mask. /// - public class GenerateReactiveDescription // TODO: make into struct (with static readonly Default property) + public struct GenerateReactiveDescription { public ResourceView ColorOpaqueOnly; public ResourceView ColorPreUpscale; public ResourceView OutReactive; public Vector2Int RenderSize; - public float Scale = 0.5f; - public float CutoffThreshold = 0.2f; - public float BinaryValue = 0.9f; - public GenerateReactiveFlags Flags = GenerateReactiveFlags.ApplyTonemap | GenerateReactiveFlags.ApplyThreshold | GenerateReactiveFlags.UseComponentsMax; + public float Scale; + public float CutoffThreshold; + public float BinaryValue; + public GenerateReactiveFlags Flags; + + public static readonly GenerateReactiveDescription Default = new GenerateReactiveDescription + { + Scale = 0.5f, + CutoffThreshold = 0.2f, + BinaryValue = 0.9f, + Flags = GenerateReactiveFlags.ApplyTonemap | GenerateReactiveFlags.ApplyThreshold | GenerateReactiveFlags.UseComponentsMax, + }; } [Flags] diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs index a1587af..4857108 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs @@ -71,10 +71,10 @@ namespace ArmASR private Vector2 _previousJitterOffset; private int _resourceFrameIndex; - public void Create(Asr.ContextDescription contextDescription) + public void Create(in Asr.ContextDescription contextDescription) { _contextDescription = contextDescription; - _commandBuffer = new CommandBuffer { name = "Arm ASR" }; + _commandBuffer = new CommandBuffer { name = "ASR" }; _upscalerConstantsBuffer = CreateConstantBuffer(); _spdConstantsBuffer = CreateConstantBuffer(); @@ -127,7 +127,7 @@ namespace ArmASR } } - public void Dispatch(Asr.DispatchDescription dispatchParams) + public void Dispatch(in Asr.DispatchDescription dispatchParams) { _commandBuffer.Clear(); Dispatch(dispatchParams, _commandBuffer); @@ -198,7 +198,7 @@ namespace ArmASR commandBuffer.ClearRenderTarget(false, true, Color.clear); } - // FSR3: need to clear here since we need the content of this surface for frame interpolation, so clearing in the lock pass is not an option + // Need to clear here since we need the content of this surface for frame interpolation, so clearing in the lock pass is not an option bool depthInverted = (_contextDescription.Flags & Asr.InitializationFlags.EnableDepthInverted) == Asr.InitializationFlags.EnableDepthInverted; commandBuffer.SetRenderTarget(AsrShaderIDs.UavReconstructedPrevNearestDepth); commandBuffer.ClearRenderTarget(false, true, depthInverted ? Color.clear : Color.white); @@ -245,29 +245,25 @@ namespace ArmASR commandBuffer.DisableShaderKeyword("UNITY_FFXM_TEXTURE2D_X_ARRAY"); } - public void GenerateReactiveMask(Asr.GenerateReactiveDescription dispatchParams) + public void GenerateReactiveMask(in Asr.GenerateReactiveDescription dispatchParams) { _commandBuffer.Clear(); GenerateReactiveMask(dispatchParams, _commandBuffer); Graphics.ExecuteCommandBuffer(_commandBuffer); } - public void GenerateReactiveMask(Asr.GenerateReactiveDescription dispatchParams, CommandBuffer commandBuffer) + public void GenerateReactiveMask(in Asr.GenerateReactiveDescription dispatchParams, CommandBuffer commandBuffer) { - const int threadGroupWorkRegionDim = 8; - int dispatchSrcX = (dispatchParams.RenderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; - int dispatchSrcY = (dispatchParams.RenderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; - GenReactiveConsts.scale = dispatchParams.Scale; GenReactiveConsts.threshold = dispatchParams.CutoffThreshold; GenReactiveConsts.binaryValue = dispatchParams.BinaryValue; GenReactiveConsts.flags = (uint)dispatchParams.Flags; commandBuffer.SetBufferData(_generateReactiveConstantsBuffer, _generateReactiveConstantsArray); - ((AsrGenerateReactivePass)_generateReactivePass).ScheduleDispatch(commandBuffer, dispatchParams, dispatchSrcX, dispatchSrcY); + ((AsrGenerateReactivePass)_generateReactivePass).ScheduleDispatch(commandBuffer, dispatchParams); } - private void SetupConstants(Asr.DispatchDescription dispatchParams, bool resetAccumulation) + private void SetupConstants(in Asr.DispatchDescription dispatchParams, bool resetAccumulation) { ref Asr.UpscalerConstants constants = ref UpscalerConsts; @@ -331,7 +327,7 @@ namespace ArmASR constants.lumaMipDimensions.y = (int)(constants.maxRenderSize.y / mipDiv); } - private Vector4 SetupDeviceDepthToViewSpaceDepthParams(Asr.DispatchDescription dispatchParams) + private Vector4 SetupDeviceDepthToViewSpaceDepthParams(in Asr.DispatchDescription dispatchParams) { bool inverted = (_contextDescription.Flags & Asr.InitializationFlags.EnableDepthInverted) != 0; bool infinite = (_contextDescription.Flags & Asr.InitializationFlags.EnableDepthInfinite) != 0; @@ -364,13 +360,13 @@ namespace ArmASR 1.0f / cotHalfFovY); } - private void SetupRcasConstants(Asr.DispatchDescription dispatchParams) + private void SetupRcasConstants(in Asr.DispatchDescription dispatchParams) { int sharpnessIndex = Mathf.RoundToInt(Mathf.Clamp01(dispatchParams.Sharpness) * (RcasConfigs.Length - 1)); RcasConsts = RcasConfigs[sharpnessIndex]; } - private void SetupSpdConstants(Asr.DispatchDescription dispatchParams, out Vector2Int dispatchThreadGroupCount) + private void SetupSpdConstants(in Asr.DispatchDescription dispatchParams, out Vector2Int dispatchThreadGroupCount) { RectInt rectInfo = new RectInt(0, 0, dispatchParams.RenderSize.x, dispatchParams.RenderSize.y); SpdSetup(rectInfo, out dispatchThreadGroupCount, out var workGroupOffset, out var numWorkGroupsAndMips); @@ -402,7 +398,7 @@ namespace ArmASR } } - private void DebugCheckDispatch(Asr.DispatchDescription dispatchParams) + private void DebugCheckDispatch(in Asr.DispatchDescription dispatchParams) { if (!dispatchParams.Color.IsValid) { diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs index 8fce782..2070b61 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs @@ -313,7 +313,7 @@ namespace ArmASR { } - public void ScheduleDispatch(CommandBuffer commandBuffer, Asr.GenerateReactiveDescription dispatchParams, int dispatchX, int dispatchY) + public void ScheduleDispatch(CommandBuffer commandBuffer, in Asr.GenerateReactiveDescription dispatchParams) { BeginSample(commandBuffer); From ac0adc168b5a305d0e34051d249ac769087468af Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 22 Mar 2025 13:02:17 +0100 Subject: [PATCH 41/88] Added definition for quality preset variants --- .../Effects/Upscaling/ASR/Runtime/Asr.cs | 11 ++++++++++- .../Effects/Upscaling/ASR/Runtime/AsrPass.cs | 18 ++++++++++++------ 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs index 158c196..88d441e 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs @@ -33,7 +33,7 @@ namespace ArmASR /// /// Creates a new ASR context with standard parameters that are appropriate for the current platform. /// - public static AsrContext CreateContext(Vector2Int displaySize, Vector2Int maxRenderSize, AsrShaders shaders, InitializationFlags flags = 0) + public static AsrContext CreateContext(Variant variant, Vector2Int displaySize, Vector2Int maxRenderSize, AsrShaders shaders, InitializationFlags flags = 0) { if (SystemInfo.usesReversedZBuffer) flags |= InitializationFlags.EnableDepthInverted; @@ -49,6 +49,7 @@ namespace ArmASR var contextDescription = new ContextDescription { Flags = flags, + Variant = variant, DisplaySize = displaySize, MaxRenderSize = maxRenderSize, Shaders = shaders, @@ -172,6 +173,13 @@ namespace ArmASR UnityEngine.Object.Destroy(obj); #endif } + + public enum Variant + { + Quality, // Maintains the same image quality as the original FSR2. + Balanced, // Gives a significant improvement in both bandwidth savings and performance uplift while maintaining close image quality to the 'quality' preset. + Performance, // A more aggressive preset that will give you the highest performance with some quality sacrifices. + } public enum QualityMode { @@ -203,6 +211,7 @@ namespace ArmASR public struct ContextDescription { public InitializationFlags Flags; + public Variant Variant; public Vector2Int MaxRenderSize; public Vector2Int DisplaySize; public AsrShaders Shaders; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs index 2070b61..d22b383 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs @@ -263,12 +263,18 @@ namespace ArmASR commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvAutoExposure, Resources.AutoExposure); commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvLumaHistory, Resources.LumaHistory[frameIndex ^ 1]); - // TODO: these need to vary depending on the ASR preset used (below is for Quality preset) - _mrt[0] = Resources.InternalUpscaled[frameIndex]; // TODO: ColorAndWeight - _mrt[1] = Resources.LockStatus[frameIndex]; - _mrt[2] = Resources.LumaHistory[frameIndex]; - _mrt[3] = dispatchParams.EnableSharpening ? BuiltinRenderTextureType.None : dispatchParams.Output.RenderTarget; - + if (ContextDescription.Variant == Asr.Variant.Quality) + { + _mrt[0] = Resources.InternalUpscaled[frameIndex]; // TODO: ColorAndWeight + _mrt[1] = Resources.LockStatus[frameIndex]; + _mrt[2] = Resources.LumaHistory[frameIndex]; + _mrt[3] = dispatchParams.EnableSharpening ? BuiltinRenderTextureType.None : dispatchParams.Output.RenderTarget; + } + else + { + // TODO: UpscaledColor, TemporalReactive, LockStatus, Color + } + FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); BlitFragment(commandBuffer, _mrt); } From 2a88584eb220d3b831271de034ceba1c696e804a Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 22 Mar 2025 13:12:38 +0100 Subject: [PATCH 42/88] Added a quick and dirty upscaler integration and fixed global keyword creation --- .../Runtime/Effects/Upscaling.cs | 7 +- .../Upscaling/ASR/Runtime/AsrKeywords.cs | 14 +- .../Runtime/Effects/Upscaling/ASRUpscaler.cs | 131 ++++++++++++++++++ .../Effects/Upscaling/ASRUpscaler.cs.meta | 11 ++ 4 files changed, 155 insertions(+), 8 deletions(-) create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs index 49fc4f0..68727e5 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs @@ -15,7 +15,9 @@ namespace UnityEngine.Rendering.PostProcessing { [InspectorName("FidelityFX Super Resolution 2.2 (FSR2)")] FSR2, [InspectorName("FidelityFX Super Resolution 3.1 (FSR3)")] FSR3, - //[InspectorName("Arm Accuracy Super Resolution (ASR)")] ASR, + [InspectorName("Arm Accuracy Super Resolution (ASR) Quality")] ASR_Quality, + [InspectorName("Arm Accuracy Super Resolution (ASR) Balanced")] ASR_Balanced, + [InspectorName("Arm Accuracy Super Resolution (ASR) Performance")] ASR_Performance, [InspectorName("Snapdragon Game Super Resolution 2 (SGSR2) 2-Pass Fragment")] SGSR2_2PassFS, [InspectorName("Snapdragon Game Super Resolution 2 (SGSR2) 2-Pass Compute")] SGSR2_2PassCS, [InspectorName("Snapdragon Game Super Resolution 2 (SGSR2) 3-Pass Compute")] SGSR2_3PassCS, @@ -182,6 +184,9 @@ namespace UnityEngine.Rendering.PostProcessing { UpscalerType.FSR2 when FSR2Upscaler.IsSupported => new FSR2Upscaler(), UpscalerType.FSR3 when FSR3Upscaler.IsSupported => new FSR3Upscaler(), + UpscalerType.ASR_Quality when ASRUpscaler.IsSupported => new ASRUpscaler_Quality(), + UpscalerType.ASR_Balanced when ASRUpscaler.IsSupported => new ASRUpscaler_Balanced(), + UpscalerType.ASR_Performance when ASRUpscaler.IsSupported => new ASRUpscaler_Performance(), UpscalerType.SGSR2_2PassFS => new SGSR2Upscaler_2PassFS(), UpscalerType.SGSR2_2PassCS when SGSR2Upscaler.IsSupported => new SGSR2Upscaler_2PassCS(), UpscalerType.SGSR2_3PassCS when SGSR2Upscaler.IsSupported => new SGSR2Upscaler_3PassCS(), diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrKeywords.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrKeywords.cs index 673e57f..f80c75e 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrKeywords.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrKeywords.cs @@ -26,13 +26,13 @@ namespace ArmASR public AsrKeywords() { #if UNITY_2021_2_OR_NEWER - _halfPrecisionKeyword = new GlobalKeyword(OptionHalfPrecision); - _hdrColorInputKeyword = new GlobalKeyword(OptionHdrColorInput); - _lowResMotionVectorsKeyword = new GlobalKeyword(OptionLowResolutionMotionVectors); - _jitteredMotionVectorsKeyword = new GlobalKeyword(OptionJitteredMotionVectors); - _invertedDepthKeyword = new GlobalKeyword(OptionInvertedDepth); - _reprojectUseLutKeyword = new GlobalKeyword(OptionReprojectUseLut); - _applySharpeningKeyword = new GlobalKeyword(OptionApplySharpening); + _halfPrecisionKeyword = GlobalKeyword.Create(OptionHalfPrecision); + _hdrColorInputKeyword = GlobalKeyword.Create(OptionHdrColorInput); + _lowResMotionVectorsKeyword = GlobalKeyword.Create(OptionLowResolutionMotionVectors); + _jitteredMotionVectorsKeyword = GlobalKeyword.Create(OptionJitteredMotionVectors); + _invertedDepthKeyword = GlobalKeyword.Create(OptionInvertedDepth); + _reprojectUseLutKeyword = GlobalKeyword.Create(OptionReprojectUseLut); + _applySharpeningKeyword = GlobalKeyword.Create(OptionApplySharpening); #endif } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs new file mode 100644 index 0000000..9833510 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs @@ -0,0 +1,131 @@ +using ArmASR; +using UnityEngine.Experimental.Rendering; + +namespace UnityEngine.Rendering.PostProcessing +{ + internal abstract class ASRUpscaler: Upscaler + { + public static bool IsSupported => SystemInfo.supportsComputeShaders; + + protected abstract Asr.Variant Variant { get; } + + private AsrContext _asrContext; + + private Asr.DispatchDescription _dispatchDescription = new(); + private Asr.GenerateReactiveDescription _genReactiveDescription = new(); + + public override void CreateContext(PostProcessRenderContext context, Upscaling config) + { + // Initialize ASR context + Asr.InitializationFlags flags = 0; + if (context.camera.allowHDR) flags |= Asr.InitializationFlags.EnableHighDynamicRange; + if (config.exposureSource == Upscaling.ExposureSource.Auto) flags |= Asr.InitializationFlags.EnableAutoExposure; + if (RuntimeUtilities.IsDynamicResolutionEnabled(context.camera)) flags |= Asr.InitializationFlags.EnableDynamicResolution; + + _asrContext = Asr.CreateContext(Variant, config.UpscaleSize, config.MaxRenderSize, context.resources.asrUpscalerShaders, flags); + } + + public override void DestroyContext() + { + base.DestroyContext(); + + if (_asrContext != null) + { + _asrContext.Destroy(); + _asrContext = null; + } + } + + public override void Render(PostProcessRenderContext context, Upscaling config) + { + var cmd = context.command; + cmd.BeginSample("ASR"); + + SetupDispatchDescription(context, config); + + if (config.autoGenerateReactiveMask) + { + SetupAutoReactiveDescription(context, config); + + var scaledRenderSize = _genReactiveDescription.RenderSize; + cmd.GetTemporaryRT(AsrShaderIDs.UavAutoReactive, scaledRenderSize.x, scaledRenderSize.y, 0, default, GraphicsFormat.R8_UNorm, 1, true); + _asrContext.GenerateReactiveMask(_genReactiveDescription, cmd); + _dispatchDescription.Reactive = new ResourceView(AsrShaderIDs.UavAutoReactive); + } + + _asrContext.Dispatch(_dispatchDescription, cmd); + + cmd.EndSample("ASR"); + } + + private void SetupDispatchDescription(PostProcessRenderContext context, Upscaling config) + { + var camera = context.camera; + + // Set up the main ASR dispatch parameters + _dispatchDescription.Color = new ResourceView(context.source); + _dispatchDescription.Depth = new ResourceView(Upscaling.GetDepthTexture(context.camera), RenderTextureSubElement.Depth); + _dispatchDescription.MotionVectors = new ResourceView(BuiltinRenderTextureType.MotionVectors); + _dispatchDescription.Exposure = ResourceView.Unassigned; + _dispatchDescription.Reactive = ResourceView.Unassigned; + _dispatchDescription.TransparencyAndComposition = ResourceView.Unassigned; + + if (config.exposureSource == Upscaling.ExposureSource.Manual && config.exposure != null) _dispatchDescription.Exposure = new ResourceView(config.exposure); + if (config.exposureSource == Upscaling.ExposureSource.Unity) _dispatchDescription.Exposure = new ResourceView(context.autoExposureTexture); + if (config.reactiveMask != null) _dispatchDescription.Reactive = new ResourceView(config.reactiveMask); + if (config.transparencyAndCompositionMask != null) _dispatchDescription.TransparencyAndComposition = new ResourceView(config.transparencyAndCompositionMask); + + var scaledRenderSize = config.GetScaledRenderSize(context.camera); + + _dispatchDescription.Output = new ResourceView(context.destination); + _dispatchDescription.PreExposure = config.preExposure; + _dispatchDescription.EnableSharpening = config.performSharpenPass; + _dispatchDescription.Sharpness = config.sharpness; + _dispatchDescription.JitterOffset = config.JitterOffset; + _dispatchDescription.MotionVectorScale.x = -scaledRenderSize.x; + _dispatchDescription.MotionVectorScale.y = -scaledRenderSize.y; + _dispatchDescription.RenderSize = scaledRenderSize; + _dispatchDescription.InputResourceSize = scaledRenderSize; + _dispatchDescription.FrameTimeDelta = Time.unscaledDeltaTime; + _dispatchDescription.CameraNear = camera.nearClipPlane; + _dispatchDescription.CameraFar = camera.farClipPlane; + _dispatchDescription.CameraFovAngleVertical = camera.fieldOfView * Mathf.Deg2Rad; + _dispatchDescription.ViewSpaceToMetersFactor = 1.0f; // 1 unit is 1 meter in Unity + _dispatchDescription.Reset = config.Reset; + + if (SystemInfo.usesReversedZBuffer) + { + // Swap the near and far clip plane distances as FSR2 expects this when using inverted depth + (_dispatchDescription.CameraNear, _dispatchDescription.CameraFar) = (_dispatchDescription.CameraFar, _dispatchDescription.CameraNear); + } + } + + private void SetupAutoReactiveDescription(PostProcessRenderContext context, Upscaling config) + { + // Set up the parameters to auto-generate a reactive mask + _genReactiveDescription.ColorOpaqueOnly = new ResourceView(config.ColorOpaqueOnly); + _genReactiveDescription.ColorPreUpscale = new ResourceView(context.source); + _genReactiveDescription.OutReactive = new ResourceView(AsrShaderIDs.UavAutoReactive); + _genReactiveDescription.RenderSize = config.GetScaledRenderSize(context.camera); + _genReactiveDescription.Scale = config.generateReactiveParameters.scale; + _genReactiveDescription.CutoffThreshold = config.generateReactiveParameters.cutoffThreshold; + _genReactiveDescription.BinaryValue = config.generateReactiveParameters.binaryValue; + _genReactiveDescription.Flags = (Asr.GenerateReactiveFlags)config.generateReactiveParameters.flags; + } + } + + internal class ASRUpscaler_Quality : ASRUpscaler + { + protected override Asr.Variant Variant => Asr.Variant.Quality; + } + + internal class ASRUpscaler_Balanced : ASRUpscaler + { + protected override Asr.Variant Variant => Asr.Variant.Balanced; + } + + internal class ASRUpscaler_Performance : ASRUpscaler + { + protected override Asr.Variant Variant => Asr.Variant.Performance; + } +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs.meta new file mode 100644 index 0000000..a6f3e44 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: efc2b52b928eb184da8528721dc1503b +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: From 3ef31fac65cb6269154332a733608cf1f9f1d8d6 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 22 Mar 2025 14:09:37 +0100 Subject: [PATCH 43/88] Some cleanup and minor fixes --- .../Upscaling/ASR/Runtime/AsrContext.cs | 38 +++---------------- .../Effects/Upscaling/ASR/Runtime/AsrPass.cs | 5 +-- .../Runtime/Effects/Upscaling/ASRUpscaler.cs | 1 + 3 files changed, 8 insertions(+), 36 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs index 4857108..a63ac5c 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs @@ -36,7 +36,6 @@ namespace ArmASR private const int MaxQueuedFrames = 16; private Asr.ContextDescription _contextDescription; - private CommandBuffer _commandBuffer; private AsrPass _computeLuminancePyramidPass; private AsrPass _reconstructPreviousDepthPass; @@ -48,6 +47,7 @@ namespace ArmASR private AsrPass _tcrAutogeneratePass; private readonly AsrResources _resources = new AsrResources(); + private readonly AsrKeywords _keywords = new AsrKeywords(); private ComputeBuffer _upscalerConstantsBuffer; private readonly Asr.UpscalerConstants[] _upscalerConstantsArray = { new Asr.UpscalerConstants() }; @@ -65,8 +65,6 @@ namespace ArmASR private readonly Asr.GenerateReactiveConstants[] _generateReactiveConstantsArray = { new Asr.GenerateReactiveConstants() }; private ref Asr.GenerateReactiveConstants GenReactiveConsts => ref _generateReactiveConstantsArray[0]; - private AsrKeywords _keywords = new(); - private bool _firstExecution; private Vector2 _previousJitterOffset; private int _resourceFrameIndex; @@ -74,7 +72,6 @@ namespace ArmASR public void Create(in Asr.ContextDescription contextDescription) { _contextDescription = contextDescription; - _commandBuffer = new CommandBuffer { name = "ASR" }; _upscalerConstantsBuffer = CreateConstantBuffer(); _spdConstantsBuffer = CreateConstantBuffer(); @@ -119,19 +116,6 @@ namespace ArmASR DestroyConstantBuffer(ref _rcasConstantsBuffer); DestroyConstantBuffer(ref _spdConstantsBuffer); DestroyConstantBuffer(ref _upscalerConstantsBuffer); - - if (_commandBuffer != null) - { - _commandBuffer.Dispose(); - _commandBuffer = null; - } - } - - public void Dispatch(in Asr.DispatchDescription dispatchParams) - { - _commandBuffer.Clear(); - Dispatch(dispatchParams, _commandBuffer); - Graphics.ExecuteCommandBuffer(_commandBuffer); } public void Dispatch(Asr.DispatchDescription dispatchParams, CommandBuffer commandBuffer) @@ -174,8 +158,6 @@ namespace ArmASR const int threadGroupWorkRegionDim = 8; int dispatchSrcX = (UpscalerConsts.renderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; int dispatchSrcY = (UpscalerConsts.renderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; - int dispatchDstX = (_contextDescription.DisplaySize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; - int dispatchDstY = (_contextDescription.DisplaySize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; // Clear reconstructed depth for max depth store if (resetAccumulation) @@ -214,16 +196,16 @@ namespace ArmASR _computeLuminancePyramidPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchThreadGroupCount.x, dispatchThreadGroupCount.y); // Reconstruct previous depth - _reconstructPreviousDepthPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY); + _reconstructPreviousDepthPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex); // Depth clip - _depthClipPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY); + _depthClipPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex); // Create locks _lockPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY); // Accumulate - _accumulatePass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchDstX, dispatchDstY); + _accumulatePass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex); if (dispatchParams.EnableSharpening) { @@ -232,10 +214,7 @@ namespace ArmASR commandBuffer.SetBufferData(_rcasConstantsBuffer, _rcasConstantsArray); // Dispatch RCAS - const int threadGroupWorkRegionDimRcas = 16; - int threadGroupsX = (_contextDescription.DisplaySize.x + threadGroupWorkRegionDimRcas - 1) / threadGroupWorkRegionDimRcas; - int threadGroupsY = (_contextDescription.DisplaySize.y + threadGroupWorkRegionDimRcas - 1) / threadGroupWorkRegionDimRcas; - _sharpenPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, threadGroupsX, threadGroupsY); + _sharpenPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex); } _resourceFrameIndex = (_resourceFrameIndex + 1) % MaxQueuedFrames; @@ -245,13 +224,6 @@ namespace ArmASR commandBuffer.DisableShaderKeyword("UNITY_FFXM_TEXTURE2D_X_ARRAY"); } - public void GenerateReactiveMask(in Asr.GenerateReactiveDescription dispatchParams) - { - _commandBuffer.Clear(); - GenerateReactiveMask(dispatchParams, _commandBuffer); - Graphics.ExecuteCommandBuffer(_commandBuffer); - } - public void GenerateReactiveMask(in Asr.GenerateReactiveDescription dispatchParams, CommandBuffer commandBuffer) { GenReactiveConsts.scale = dispatchParams.Scale; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs index d22b383..0fc44f2 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs @@ -19,7 +19,6 @@ // THE SOFTWARE. using System; -using System.Collections.Generic; using System.Diagnostics; using System.Runtime.CompilerServices; using UnityEngine; @@ -35,7 +34,7 @@ namespace ArmASR /// internal abstract class AsrPass: IDisposable { - internal const int ShadingChangeMipLevel = 4; // This matches the FFXM_FSR2_SHADING_CHANGE_MIP_LEVEL define + internal const int ShadingChangeMipLevel = 4; // This matches the FFXM_FSR2_SHADING_CHANGE_MIP_LEVEL define // TODO: still relevant? protected readonly Asr.ContextDescription ContextDescription; protected readonly AsrResources Resources; @@ -66,7 +65,7 @@ namespace ArmASR } } - public void ScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + public void ScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX = 0, int dispatchY = 0) { BeginSample(commandBuffer); DoScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchX, dispatchY); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs index 9833510..47107cc 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs @@ -49,6 +49,7 @@ namespace UnityEngine.Rendering.PostProcessing var scaledRenderSize = _genReactiveDescription.RenderSize; cmd.GetTemporaryRT(AsrShaderIDs.UavAutoReactive, scaledRenderSize.x, scaledRenderSize.y, 0, default, GraphicsFormat.R8_UNorm, 1, true); + _genReactiveDescription.OutReactive = new ResourceView(AsrShaderIDs.UavAutoReactive); _asrContext.GenerateReactiveMask(_genReactiveDescription, cmd); _dispatchDescription.Reactive = new ResourceView(AsrShaderIDs.UavAutoReactive); } From 20a930097ef9780b736b49b90fe1969b85a48fb8 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 22 Mar 2025 15:24:59 +0100 Subject: [PATCH 44/88] Added keywords for balanced/performance presets and GLES support --- .../Upscaling/ASR/Runtime/AsrContext.cs | 2 +- .../Upscaling/ASR/Runtime/AsrKeywords.cs | 40 ++++++++++++++++++- .../ASR/Shaders/ffxm_fsr2_common.cginc | 5 +++ 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs index a63ac5c..1424b56 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs @@ -128,7 +128,7 @@ namespace ArmASR if (dispatchParams.UseTextureArrays) commandBuffer.EnableShaderKeyword("UNITY_FFXM_TEXTURE2D_X_ARRAY"); - _keywords.ApplyKeywords(commandBuffer, _contextDescription.Flags, dispatchParams); + _keywords.ApplyKeywords(commandBuffer, _contextDescription.Variant, _contextDescription.Flags, dispatchParams); if (_firstExecution) { diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrKeywords.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrKeywords.cs index f80c75e..43de5e1 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrKeywords.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrKeywords.cs @@ -12,6 +12,8 @@ namespace ArmASR private static readonly string OptionInvertedDepth = "FFXM_FSR2_OPTION_INVERTED_DEPTH"; private static readonly string OptionReprojectUseLut = "FFXM_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE"; private static readonly string OptionApplySharpening = "FFXM_FSR2_OPTION_APPLY_SHARPENING"; + private static readonly string OptionBalancedPreset = "FFXM_FSR2_OPTION_SHADER_OPT_BALANCED"; + private static readonly string OptionPerformancePreset = "FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE"; #if UNITY_2021_2_OR_NEWER private readonly GlobalKeyword _halfPrecisionKeyword; @@ -21,6 +23,8 @@ namespace ArmASR private readonly GlobalKeyword _invertedDepthKeyword; private readonly GlobalKeyword _reprojectUseLutKeyword; private readonly GlobalKeyword _applySharpeningKeyword; + private readonly GlobalKeyword _balancedPresetKeyword; + private readonly GlobalKeyword _performancePresetKeyword; #endif public AsrKeywords() @@ -33,10 +37,12 @@ namespace ArmASR _invertedDepthKeyword = GlobalKeyword.Create(OptionInvertedDepth); _reprojectUseLutKeyword = GlobalKeyword.Create(OptionReprojectUseLut); _applySharpeningKeyword = GlobalKeyword.Create(OptionApplySharpening); + _balancedPresetKeyword = GlobalKeyword.Create(OptionBalancedPreset); + _performancePresetKeyword = GlobalKeyword.Create(OptionPerformancePreset); #endif } - public void ApplyKeywords(CommandBuffer commandBuffer, Asr.InitializationFlags initFlags, in Asr.DispatchDescription dispatchParams) + public void ApplyKeywords(CommandBuffer commandBuffer, Asr.Variant variant, Asr.InitializationFlags initFlags, in Asr.DispatchDescription dispatchParams) { bool useLut = false; #if UNITY_2022_1_OR_NEWER // This will also work in 2020.3.43+ and 2021.3.14+ @@ -82,6 +88,22 @@ namespace ArmASR commandBuffer.EnableKeyword(_applySharpeningKeyword); else commandBuffer.DisableKeyword(_applySharpeningKeyword); + + switch (variant) + { + case Asr.Variant.Quality: + commandBuffer.DisableKeyword(_balancedPresetKeyword); + commandBuffer.DisableKeyword(_performancePresetKeyword); + break; + case Asr.Variant.Balanced: + commandBuffer.EnableKeyword(_balancedPresetKeyword); + commandBuffer.DisableKeyword(_performancePresetKeyword); + break; + case Asr.Variant.Performance: + commandBuffer.EnableKeyword(_balancedPresetKeyword); + commandBuffer.EnableKeyword(_performancePresetKeyword); + break; + } #else if ((initFlags & Asr.InitializationFlags.EnableFP16Usage) != 0) commandBuffer.EnableShaderKeyword(OptionHalfPrecision); @@ -117,6 +139,22 @@ namespace ArmASR commandBuffer.EnableShaderKeyword(OptionApplySharpening); else commandBuffer.DisableShaderKeyword(OptionApplySharpening); + + switch (variant) + { + case Asr.Variant.Quality: + commandBuffer.DisableShaderKeyword(OptionBalancedPreset); + commandBuffer.DisableShaderKeyword(OptionPerformancePreset); + break; + case Asr.Variant.Balanced: + commandBuffer.EnableShaderKeyword(OptionBalancedPreset); + commandBuffer.DisableShaderKeyword(OptionPerformancePreset); + break; + case Asr.Variant.Performance: + commandBuffer.EnableShaderKeyword(OptionBalancedPreset); + commandBuffer.EnableShaderKeyword(OptionPerformancePreset); + break; + } #endif } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc index 4d8ce66..a108327 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc @@ -3,6 +3,11 @@ #pragma warning(disable: 3205) // Conversion from larger type to smaller, possible loss of data #pragma warning(disable: 3556) // Integer divides might be much slower, try using uints if possible +// ASR has some special code paths for OpenGL ES 3.2 +#if defined(SHADER_API_GLES3) +#define FFXM_SHADER_PLATFORM_GLES_3_2 (1) +#endif + // Work around the lack of texture atomics on Metal #if defined(SHADER_API_METAL) #define InterlockedAdd(dest, val, orig) { (orig) = (dest); (dest) += (val); } From d71b061d16a02035173ba5b57968c482ad028d26 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 22 Mar 2025 16:08:39 +0100 Subject: [PATCH 45/88] Added new shader IDs and resource definitions --- .../Upscaling/ASR/Runtime/AsrResources.cs | 51 +++++++++++++++---- .../Upscaling/ASR/Runtime/AsrShaderIDs.cs | 15 ++++++ 2 files changed, 56 insertions(+), 10 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs index a879238..5fedade 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs @@ -41,9 +41,10 @@ namespace ArmASR public readonly RenderTexture[] DilatedMotionVectors = new RenderTexture[2]; public readonly RenderTexture[] LockStatus = new RenderTexture[2]; public readonly RenderTexture[] InternalUpscaled = new RenderTexture[2]; + public readonly RenderTexture[] InternalReactive = new RenderTexture[2]; public readonly RenderTexture[] LumaHistory = new RenderTexture[2]; - public void Create(Asr.ContextDescription contextDescription) + public void Create(in Asr.ContextDescription contextDescription) { // Generate the data for the LUT const int lanczos2LutWidth = 128; @@ -61,6 +62,8 @@ namespace ArmASR maximumBias[i] = MaximumBias[i] / 2.0f; } + GetFormatRequirements(contextDescription, out bool isBalancedOrPerformance, out bool preparedInputColorNeedsFp16, out GraphicsFormat r8Format, out GraphicsFormat r16Format, out GraphicsFormat rg16Format); + // Resource FSR2_LanczosLutData: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R16_SNORM, FFX_RESOURCE_FLAGS_NONE // R16_SNorm textures are not supported by Unity on most platforms, strangely enough. So instead we use R32_SFloat and upload pre-normalized float data. LanczosLut = new Texture2D(lanczos2LutWidth, 1, GraphicsFormat.R32_SFloat, TextureCreationFlags.None) { name = "ASR_LanczosLutData" }; @@ -88,14 +91,14 @@ namespace ArmASR SpdAtomicCounter.Create(); // Resource FSR2_AutoExposure: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32G32_FLOAT, FFX_RESOURCE_FLAGS_NONE - AutoExposure = new RenderTexture(1, 1, 0, GraphicsFormat.R32G32_SFloat) { name = "ASR_AutoExposure", enableRandomWrite = true }; + AutoExposure = new RenderTexture(1, 1, 0, rg16Format) { name = "ASR_AutoExposure", enableRandomWrite = true }; AutoExposure.Create(); // Resource FSR2_ExposureMips: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE // This is a rather special case: it's an aliasable resource, but because we require a mipmap chain and bind specific mip levels per shader, we can't easily use temporary RTs for this. int w = contextDescription.MaxRenderSize.x / 2, h = contextDescription.MaxRenderSize.y / 2; int mipCount = 1 + Mathf.FloorToInt(Mathf.Log(Math.Max(w, h), 2.0f)); - SceneLuminance = new RenderTexture(w, h, 0, GraphicsFormat.R16_SFloat, mipCount) { name = "ASR_ExposureMips", enableRandomWrite = true, useMipMap = true, autoGenerateMips = false }; + SceneLuminance = new RenderTexture(w, h, 0, r16Format, mipCount) { name = "ASR_ExposureMips", enableRandomWrite = true, useMipMap = true, autoGenerateMips = false }; SceneLuminance.Create(); // Resources FSR2_InternalDilatedVelocity1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16_FLOAT, FFX_RESOURCE_FLAGS_NONE @@ -105,18 +108,29 @@ namespace ArmASR CreateDoubleBufferedResource(LockStatus, "ASR_LockStatus", contextDescription.DisplaySize, GraphicsFormat.R16G16_SFloat); // Resources FSR2_InternalUpscaled1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, FFX_RESOURCE_FLAGS_NONE - CreateDoubleBufferedResource(InternalUpscaled, "ASR_InternalUpscaled", contextDescription.DisplaySize, GraphicsFormat.R16G16B16A16_SFloat); - - // Resources FSR2_LumaHistory1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, FFX_RESOURCE_FLAGS_NONE - CreateDoubleBufferedResource(LumaHistory, "ASR_LumaHistory", contextDescription.DisplaySize, GraphicsFormat.R8G8B8A8_UNorm); + CreateDoubleBufferedResource(InternalUpscaled, "ASR_InternalUpscaled", contextDescription.DisplaySize, !isBalancedOrPerformance ? GraphicsFormat.R16G16B16A16_SFloat : GraphicsFormat.B10G11R11_UFloatPack32); + + // Additional textures used by either balanced or performance presets + if (isBalancedOrPerformance) + { + // Resources FSR2_InternalReactive1/2: FFXM_RESOURCE_USAGE_RENDERTARGET, FFXM_SURFACE_FORMAT_R8_SNORM, FFXM_RESOURCE_FLAGS_NONE + CreateDoubleBufferedResource(InternalReactive, "ASR_InternalReactive", contextDescription.DisplaySize, GraphicsFormat.R8_SNorm); // TODO: R8_SNorm *might* be a problem? + } + else // Quality preset specific + { + // Resources FSR2_LumaHistory1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, FFX_RESOURCE_FLAGS_NONE + CreateDoubleBufferedResource(LumaHistory, "ASR_LumaHistory", contextDescription.DisplaySize, GraphicsFormat.R8G8B8A8_UNorm); + } } // Set up shared aliasable resources, i.e. temporary render textures // These do not need to persist between frames, but they do need to be available between passes - public static void CreateAliasableResources(CommandBuffer commandBuffer, Asr.ContextDescription contextDescription, Asr.DispatchDescription dispatchParams) + public static void CreateAliasableResources(CommandBuffer commandBuffer, in Asr.ContextDescription contextDescription, in Asr.DispatchDescription dispatchParams) { Vector2Int displaySize = contextDescription.DisplaySize; Vector2Int maxRenderSize = contextDescription.MaxRenderSize; + + GetFormatRequirements(contextDescription, out bool isBalancedOrPerformance, out bool preparedInputColorNeedsFp16, out GraphicsFormat r8Format, out GraphicsFormat r16Format, out GraphicsFormat rg16Format); // FSR2_ReconstructedPrevNearestDepth: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_UINT, FFX_RESOURCE_FLAGS_ALIASABLE commandBuffer.GetTemporaryRT(AsrShaderIDs.UavReconstructedPrevNearestDepth, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R32_UInt, 1, true); @@ -131,10 +145,26 @@ namespace ArmASR commandBuffer.GetTemporaryRT(AsrShaderIDs.UavDilatedReactiveMasks, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R8G8_UNorm, 1, true); // FSR2_PreparedInputColor: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE - commandBuffer.GetTemporaryRT(AsrShaderIDs.UavPreparedInputColor, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R16G16B16A16_SFloat, 1, true); + commandBuffer.GetTemporaryRT(AsrShaderIDs.UavPreparedInputColor, maxRenderSize.x, maxRenderSize.y, 0, default, preparedInputColorNeedsFp16 ? GraphicsFormat.R16G16B16A16_SFloat : GraphicsFormat.R8G8B8A8_UNorm, 1, true); // FSR2_NewLocks: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_ALIASABLE - commandBuffer.GetTemporaryRT(AsrShaderIDs.UavNewLocks, displaySize.x, displaySize.y, 0, default, GraphicsFormat.R8_UNorm, 1, true); + commandBuffer.GetTemporaryRT(AsrShaderIDs.UavNewLocks, displaySize.x, displaySize.y, 0, default, r8Format, 1, true); + } + + private static void GetFormatRequirements(in Asr.ContextDescription contextDescription, + out bool isBalancedOrPerformance, out bool preparedInputColorNeedsFP16, + out GraphicsFormat r8Format, out GraphicsFormat r16Format, out GraphicsFormat rg16Format) + { + bool applyPerfModeOptimizations = contextDescription.Variant == Asr.Variant.Performance; + bool applyBalancedModeOptimizations = contextDescription.Variant == Asr.Variant.Balanced; + isBalancedOrPerformance = applyBalancedModeOptimizations || applyPerfModeOptimizations; + preparedInputColorNeedsFP16 = !applyPerfModeOptimizations; + + // OpenGLES 3.2 specific: We need to work around some GLES limitations for some resources. + bool isOpenGLES = SystemInfo.graphicsDeviceType == GraphicsDeviceType.OpenGLES3; + r8Format = isOpenGLES ? GraphicsFormat.R32_SFloat : GraphicsFormat.R8_UNorm; + r16Format = isOpenGLES ? GraphicsFormat.R32_SFloat : GraphicsFormat.R16_SFloat; + rg16Format = isOpenGLES ? GraphicsFormat.R16G16B16A16_SFloat : GraphicsFormat.R16G16_SFloat; } public static void DestroyAliasableResources(CommandBuffer commandBuffer) @@ -160,6 +190,7 @@ namespace ArmASR public void Destroy() { DestroyResource(LumaHistory); + DestroyResource(InternalReactive); DestroyResource(InternalUpscaled); DestroyResource(LockStatus); DestroyResource(DilatedMotionVectors); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrShaderIDs.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrShaderIDs.cs index 8f829bb..64af0eb 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrShaderIDs.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrShaderIDs.cs @@ -38,7 +38,9 @@ namespace ArmASR public static readonly int SrvPrevDilatedMotionVectors = Shader.PropertyToID("r_previous_dilated_motion_vectors"); public static readonly int SrvDilatedDepth = Shader.PropertyToID("r_dilatedDepth"); public static readonly int SrvInternalUpscaled = Shader.PropertyToID("r_internal_upscaled_color"); + public static readonly int SrvInternalTemporalReactive = Shader.PropertyToID("r_internal_temporal_reactive"); public static readonly int SrvLockStatus = Shader.PropertyToID("r_lock_status"); + public static readonly int SrvNewLocks = Shader.PropertyToID("r_new_locks"); public static readonly int SrvLockInputLuma = Shader.PropertyToID("r_lock_input_luma"); public static readonly int SrvPreparedInputColor = Shader.PropertyToID("r_prepared_input_color"); public static readonly int SrvLumaHistory = Shader.PropertyToID("r_luma_history"); @@ -65,6 +67,19 @@ namespace ArmASR public static readonly int UavAutoExposure = Shader.PropertyToID("rw_auto_exposure"); public static readonly int UavSpdAtomicCount = Shader.PropertyToID("rw_spd_global_atomic"); public static readonly int UavAutoReactive = Shader.PropertyToID("rw_output_autoreactive"); + + // Render textures, i.e. output targets for fragment shaders + public static readonly int RtInternalUpscalerColor = Shader.PropertyToID("rw_internal_upscaled_color"); + public static readonly int RtInternalTemporalReactive = Shader.PropertyToID("rw_internal_temporal_reactive"); + public static readonly int RtLockStatus = Shader.PropertyToID("rw_lock_status"); + public static readonly int RtLumaHistory = Shader.PropertyToID("rw_luma_history"); + public static readonly int RtUpscaledOutput = Shader.PropertyToID("rw_upscaled_output"); + public static readonly int RtDilatedReactiveMasks = Shader.PropertyToID("rw_dilated_reactive_masks"); + public static readonly int RtPreparedInputColor = Shader.PropertyToID("rw_prepared_input_color"); + public static readonly int RtDilatedMotionVectors = Shader.PropertyToID("rw_dilated_motion_vectors"); + public static readonly int RtDilatedDepth = Shader.PropertyToID("rw_dilatedDepth"); + public static readonly int RtLockInputLuma = Shader.PropertyToID("rw_lock_input_luma"); + public static readonly int RtAutoReactive = Shader.PropertyToID("rw_output_autoreactive"); // Constant buffer bindings public static readonly int CbFsr2 = Shader.PropertyToID("cbFSR2"); From 82734c7606625ffda7b463281e58d3a6b978fdfd Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 22 Mar 2025 17:20:44 +0100 Subject: [PATCH 46/88] Went over all the shader inputs and output and implemented them. Realized that some fragment shaders use UAVs with random write, is that even possible in Unity? --- .../Upscaling/ASR/Runtime/AsrContext.cs | 15 ++++++- .../Effects/Upscaling/ASR/Runtime/AsrPass.cs | 39 ++++++++++++++----- .../Upscaling/ASR/Runtime/AsrResources.cs | 4 +- .../ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h | 2 +- 4 files changed, 47 insertions(+), 13 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs index 1424b56..1ef2b01 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs @@ -130,12 +130,16 @@ namespace ArmASR _keywords.ApplyKeywords(commandBuffer, _contextDescription.Variant, _contextDescription.Flags, dispatchParams); + AsrResources.CreateAliasableResources(commandBuffer, _contextDescription, dispatchParams); + if (_firstExecution) { commandBuffer.SetRenderTarget(_resources.LockStatus[0]); commandBuffer.ClearRenderTarget(false, true, Color.clear); commandBuffer.SetRenderTarget(_resources.LockStatus[1]); commandBuffer.ClearRenderTarget(false, true, Color.clear); + commandBuffer.SetRenderTarget(AsrShaderIDs.UavPreparedInputColor); + commandBuffer.ClearRenderTarget(false, true, Color.clear); } int frameIndex = _resourceFrameIndex % 2; @@ -150,7 +154,6 @@ namespace ArmASR if (!dispatchParams.Reactive.IsValid) dispatchParams.Reactive = new ResourceView(_resources.DefaultReactive); if (!dispatchParams.TransparencyAndComposition.IsValid) dispatchParams.TransparencyAndComposition = new ResourceView(_resources.DefaultReactive); - AsrResources.CreateAliasableResources(commandBuffer, _contextDescription, dispatchParams); SetupConstants(dispatchParams, resetAccumulation); @@ -158,6 +161,10 @@ namespace ArmASR const int threadGroupWorkRegionDim = 8; int dispatchSrcX = (UpscalerConsts.renderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; int dispatchSrcY = (UpscalerConsts.renderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + + bool applyPerfModeOptimizations = _contextDescription.Variant == Asr.Variant.Performance; + bool applyBalancedModeOptimizations = _contextDescription.Variant == Asr.Variant.Balanced; + bool isBalancedOrPerformance = applyBalancedModeOptimizations || applyPerfModeOptimizations; // Clear reconstructed depth for max depth store if (resetAccumulation) @@ -167,6 +174,12 @@ namespace ArmASR commandBuffer.SetRenderTarget(_resources.InternalUpscaled[frameIndex ^ 1]); commandBuffer.ClearRenderTarget(false, true, Color.clear); + + if (isBalancedOrPerformance) + { + commandBuffer.SetRenderTarget(_resources.InternalReactive[frameIndex ^ 1]); + commandBuffer.ClearRenderTarget(false, true, Color.clear); + } commandBuffer.SetRenderTarget(_resources.SceneLuminance); commandBuffer.ClearRenderTarget(false, true, Color.clear); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs index 0fc44f2..ba503ee 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs @@ -34,7 +34,7 @@ namespace ArmASR /// internal abstract class AsrPass: IDisposable { - internal const int ShadingChangeMipLevel = 4; // This matches the FFXM_FSR2_SHADING_CHANGE_MIP_LEVEL define // TODO: still relevant? + internal const int ShadingChangeMipLevel = 4; // This matches the FFXM_FSR2_SHADING_CHANGE_MIP_LEVEL define protected readonly Asr.ContextDescription ContextDescription; protected readonly AsrResources Resources; @@ -162,6 +162,8 @@ namespace ArmASR internal class AsrReconstructPreviousDepthPass : AsrPass { + private readonly RenderTargetIdentifier[] _mrt = new RenderTargetIdentifier[3]; + public AsrReconstructPreviousDepthPass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) : base(contextDescription, resources, constants) { @@ -175,8 +177,15 @@ namespace ArmASR commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputMotionVectors, dispatchParams.MotionVectors); commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputExposure, dispatchParams.Exposure); + // TODO UAVs in fragment shaders? That seems like it might be a problem... + commandBuffer.SetGlobalTexture(AsrShaderIDs.UavReconstructedPrevNearestDepth, AsrShaderIDs.UavReconstructedPrevNearestDepth); + + _mrt[0] = AsrShaderIDs.RtDilatedDepth; // fDepth + _mrt[1] = Resources.DilatedMotionVectors[frameIndex]; // fMotionVector + _mrt[2] = AsrShaderIDs.RtLockInputLuma; // fLuma + FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); - BlitFragment(commandBuffer, Resources.DilatedMotionVectors[frameIndex]); + BlitFragment(commandBuffer, _mrt); } } @@ -204,8 +213,8 @@ namespace ArmASR commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvDilatedDepth, AsrShaderIDs.UavDilatedDepth); commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvPrevDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex ^ 1]); - _mrt[0] = AsrShaderIDs.UavDilatedReactiveMasks; - _mrt[1] = BuiltinRenderTextureType.None; // TODO: Tonemapped + _mrt[0] = AsrShaderIDs.RtDilatedReactiveMasks; // fDilatedReactiveMasks + _mrt[1] = AsrShaderIDs.RtPreparedInputColor; // fTonemapped FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); BlitFragment(commandBuffer, _mrt); @@ -223,6 +232,10 @@ namespace ArmASR protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvLockInputLuma, AsrShaderIDs.UavLockInputLuma); + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavNewLocks, AsrShaderIDs.UavNewLocks); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavReconstructedPrevNearestDepth, AsrShaderIDs.UavReconstructedPrevNearestDepth); + commandBuffer.SetComputeConstantBufferParam(ComputeShader, AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); @@ -261,19 +274,26 @@ namespace ArmASR commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvSceneLuminanceMips, Resources.SceneLuminance); commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvAutoExposure, Resources.AutoExposure); commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvLumaHistory, Resources.LumaHistory[frameIndex ^ 1]); + commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvInternalTemporalReactive, Resources.InternalReactive[frameIndex ^ 1]); + + // TODO UAVs in fragment shaders? That seems like it might be a problem... + commandBuffer.SetGlobalTexture(AsrShaderIDs.UavNewLocks, AsrShaderIDs.UavNewLocks); if (ContextDescription.Variant == Asr.Variant.Quality) { - _mrt[0] = Resources.InternalUpscaled[frameIndex]; // TODO: ColorAndWeight - _mrt[1] = Resources.LockStatus[frameIndex]; - _mrt[2] = Resources.LumaHistory[frameIndex]; - _mrt[3] = dispatchParams.EnableSharpening ? BuiltinRenderTextureType.None : dispatchParams.Output.RenderTarget; + _mrt[0] = Resources.InternalUpscaled[frameIndex]; // fColorAndWeight + _mrt[1] = Resources.LockStatus[frameIndex]; // fLockStatus + _mrt[2] = Resources.LumaHistory[frameIndex]; // fLumaHistory } else { - // TODO: UpscaledColor, TemporalReactive, LockStatus, Color + _mrt[0] = Resources.InternalUpscaled[frameIndex]; // fUpscaledColor + _mrt[1] = Resources.InternalReactive[frameIndex]; // fTemporalReactive + _mrt[2] = Resources.LockStatus[frameIndex]; // fLockStatus } + _mrt[3] = dispatchParams.EnableSharpening ? BuiltinRenderTextureType.None : dispatchParams.Output.RenderTarget; // fColor + FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); BlitFragment(commandBuffer, _mrt); } @@ -325,6 +345,7 @@ namespace ArmASR commandBuffer.SetComputeResourceParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvOpaqueOnly, dispatchParams.ColorOpaqueOnly); commandBuffer.SetComputeResourceParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputColor, dispatchParams.ColorPreUpscale); + FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbGenReactive, _generateReactiveConstants, 0, _generateReactiveConstants.stride); BlitFragment(commandBuffer, dispatchParams.OutReactive.RenderTarget); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs index 5fedade..c2eeb65 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs @@ -62,7 +62,7 @@ namespace ArmASR maximumBias[i] = MaximumBias[i] / 2.0f; } - GetFormatRequirements(contextDescription, out bool isBalancedOrPerformance, out bool preparedInputColorNeedsFp16, out GraphicsFormat r8Format, out GraphicsFormat r16Format, out GraphicsFormat rg16Format); + GetFormatRequirements(contextDescription, out bool isBalancedOrPerformance, out _, out _, out GraphicsFormat r16Format, out GraphicsFormat rg16Format); // Resource FSR2_LanczosLutData: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R16_SNORM, FFX_RESOURCE_FLAGS_NONE // R16_SNorm textures are not supported by Unity on most platforms, strangely enough. So instead we use R32_SFloat and upload pre-normalized float data. @@ -130,7 +130,7 @@ namespace ArmASR Vector2Int displaySize = contextDescription.DisplaySize; Vector2Int maxRenderSize = contextDescription.MaxRenderSize; - GetFormatRequirements(contextDescription, out bool isBalancedOrPerformance, out bool preparedInputColorNeedsFp16, out GraphicsFormat r8Format, out GraphicsFormat r16Format, out GraphicsFormat rg16Format); + GetFormatRequirements(contextDescription, out _, out bool preparedInputColorNeedsFp16, out GraphicsFormat r8Format, out _, out _); // FSR2_ReconstructedPrevNearestDepth: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_UINT, FFX_RESOURCE_FLAGS_ALIASABLE commandBuffer.GetTemporaryRT(AsrShaderIDs.UavReconstructedPrevNearestDepth, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R32_UInt, 1, true); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h index b78afdc..1efc9e1 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h @@ -125,7 +125,7 @@ void ComputeLock(FfxInt32x2 iPxLrPos) StoreNewLocks(ComputeHrPosFromLrPos(iPxLrPos), 1.f); } - ClearResourcesForNextFrame(iPxLrPos); + //ClearResourcesForNextFrame(iPxLrPos); } #endif // FFXM_FSR2_LOCK_H From c4df3d45aaee9ba2b78e572e9a1fdf064b39601f Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 22 Mar 2025 17:49:16 +0100 Subject: [PATCH 47/88] Got UAV bindings in fragment shaders to work in what I think is the correct way --- .../Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs | 10 ++++++---- .../Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl | 2 +- .../ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs index ba503ee..c3a74e4 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs @@ -177,8 +177,8 @@ namespace ArmASR commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputMotionVectors, dispatchParams.MotionVectors); commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputExposure, dispatchParams.Exposure); - // TODO UAVs in fragment shaders? That seems like it might be a problem... - commandBuffer.SetGlobalTexture(AsrShaderIDs.UavReconstructedPrevNearestDepth, AsrShaderIDs.UavReconstructedPrevNearestDepth); + // UAV binding in fragment shader, index needs to match the register binding in HLSL + commandBuffer.SetRandomWriteTarget(3, AsrShaderIDs.UavReconstructedPrevNearestDepth); _mrt[0] = AsrShaderIDs.RtDilatedDepth; // fDepth _mrt[1] = Resources.DilatedMotionVectors[frameIndex]; // fMotionVector @@ -186,6 +186,7 @@ namespace ArmASR FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); BlitFragment(commandBuffer, _mrt); + commandBuffer.ClearRandomWriteTargets(); } } @@ -276,8 +277,8 @@ namespace ArmASR commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvLumaHistory, Resources.LumaHistory[frameIndex ^ 1]); commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvInternalTemporalReactive, Resources.InternalReactive[frameIndex ^ 1]); - // TODO UAVs in fragment shaders? That seems like it might be a problem... - commandBuffer.SetGlobalTexture(AsrShaderIDs.UavNewLocks, AsrShaderIDs.UavNewLocks); + // UAV binding in fragment shader, index needs to match the register binding in HLSL + commandBuffer.SetRandomWriteTarget(4, AsrShaderIDs.UavNewLocks); if (ContextDescription.Variant == Asr.Variant.Quality) { @@ -296,6 +297,7 @@ namespace ArmASR FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); BlitFragment(commandBuffer, _mrt); + commandBuffer.ClearRandomWriteTargets(); } } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl index 7f78bbb..49dbed7 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl @@ -36,7 +36,7 @@ #define FSR2_BIND_SRV_LUMA_HISTORY 10 #define FSR2_BIND_SRV_TEMPORAL_REACTIVE 11 -#define FSR2_BIND_UAV_NEW_LOCKS 12 +#define FSR2_BIND_UAV_NEW_LOCKS 4 #define FSR2_BIND_CB_FSR2 0 diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl index 5a71c68..ff65021 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl @@ -24,7 +24,7 @@ #define FSR2_BIND_SRV_INPUT_COLOR 2 #define FSR2_BIND_SRV_INPUT_EXPOSURE 3 -#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 4 +#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 3 #define FSR2_BIND_CB_FSR2 0 From 943d11970695d501e25137cd8aee57eead0515c9 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 22 Mar 2025 23:02:44 +0100 Subject: [PATCH 48/88] Fixed a number of issues to make Quality variant work: - Removed "fix" to initialize params to 0, as that happens midway through the Accumulation process, resetting a lot of variables. Instead rewrote InitParams to take an inout parameter and clear the params first thing in the Accumulate function. - Bind first multi-render target as depth target, fixes RenderDoc errors about color/depth buffer size mismatch. - Ensure keywords that need a value are redefined using the correct value. - Removed obsolete auto-TCR constants struct. --- .../Effects/Upscaling/ASR/Runtime/Asr.cs | 10 +--------- .../Effects/Upscaling/ASR/Runtime/AsrPass.cs | 2 +- .../ASR/Shaders/ffxm_fsr2_common.cginc | 17 +++++++++++++++++ .../Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h | 12 +++--------- .../fsr2/ffxm_fsr2_postprocess_lock_status.h | 2 +- 5 files changed, 23 insertions(+), 20 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs index 88d441e..1074a9c 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs @@ -300,6 +300,7 @@ namespace ArmASR public float deltaTime; public float dynamicResChangeFactor; public float viewSpaceToMetersFactor; + public float padding; } @@ -320,15 +321,6 @@ namespace ArmASR public float binaryValue; public uint flags; } - - [Serializable, StructLayout(LayoutKind.Sequential)] - internal struct GenerateReactiveConstants2 - { - public float autoTcThreshold; - public float autoTcScale; - public float autoReactiveScale; - public float autoReactiveMax; - } [Serializable, StructLayout(LayoutKind.Sequential)] internal struct RcasConstants diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs index c3a74e4..a413f46 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs @@ -109,7 +109,7 @@ namespace ArmASR [MethodImpl(MethodImplOptions.AggressiveInlining)] protected void BlitFragment(CommandBuffer commandBuffer, RenderTargetIdentifier[] renderTargets) { - commandBuffer.SetRenderTarget(renderTargets, BuiltinRenderTextureType.None); + commandBuffer.SetRenderTarget(renderTargets, renderTargets[0]); BlitFragment(commandBuffer); } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc index a108327..895e4cf 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc @@ -3,9 +3,26 @@ #pragma warning(disable: 3205) // Conversion from larger type to smaller, possible loss of data #pragma warning(disable: 3556) // Integer divides might be much slower, try using uints if possible +// If these keywords are set by Unity, redefine them to have a truthy value +#ifdef FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE +#undef FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE +#define FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE 1 +#endif +#ifdef FFXM_FSR2_OPTION_SHADER_OPT_BALANCED +#undef FFXM_FSR2_OPTION_SHADER_OPT_BALANCED +#define FFXM_FSR2_OPTION_SHADER_OPT_BALANCED 1 +#endif + +// Ensure the correct value is defined for this keyword, as it is used to select one of multiple sampler functions +#ifdef FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE +#undef FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE +#define FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE 1 +#endif + // ASR has some special code paths for OpenGL ES 3.2 #if defined(SHADER_API_GLES3) #define FFXM_SHADER_PLATFORM_GLES_3_2 (1) +#define unorm #endif // Work around the lack of texture atomics on Metal diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h index cb2ab5f..2b28504 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h @@ -244,8 +244,6 @@ FfxFloat32 ComputeTemporalReactiveFactor(const AccumulationPassCommonParams para void initReactiveMaskFactors(FFXM_PARAMETER_INOUT AccumulationPassCommonParams params) { - params = (AccumulationPassCommonParams)0; - const FFXM_MIN16_F2 fDilatedReactiveMasks = FFXM_MIN16_F2(SampleDilatedReactiveMasks(params.fLrUv_HwSampler)); params.fDilatedReactiveFactor = fDilatedReactiveMasks.x; params.fAccumulationMask = fDilatedReactiveMasks.y; @@ -262,11 +260,8 @@ void initIsNewSample(FFXM_PARAMETER_INOUT AccumulationPassCommonParams params) params.bIsNewSample = (params.bIsExistingSample == false || bIsResetFrame); } - -AccumulationPassCommonParams InitParams(FfxInt32x2 iPxHrPos) +void InitParams(FFXM_PARAMETER_INOUT AccumulationPassCommonParams params, FfxInt32x2 iPxHrPos) { - AccumulationPassCommonParams params; - params.iPxHrPos = iPxHrPos; const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize(); params.fHrUv = fHrUv; @@ -278,13 +273,12 @@ AccumulationPassCommonParams InitParams(FfxInt32x2 iPxHrPos) params.fHrVelocity = GetPxHrVelocity(params.fMotionVector); ComputeReprojectedUVs(params, params.fReprojectedHrUv, params.bIsExistingSample); - - return params; } AccumulateOutputs Accumulate(FfxInt32x2 iPxHrPos) { - AccumulationPassCommonParams params = InitParams(iPxHrPos); + AccumulationPassCommonParams params = (AccumulationPassCommonParams)0; + InitParams(params, iPxHrPos); FfxFloat32x3 fHistoryColor = FfxFloat32x3(0, 0, 0); FFXM_MIN16_F2 fLockStatus; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_postprocess_lock_status.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_postprocess_lock_status.h index 8d6e0bf..eab63d3 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_postprocess_lock_status.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_postprocess_lock_status.h @@ -56,7 +56,7 @@ FfxFloat32 GetShadingChangeLuma(FfxInt32x2 iPxHrPos, FfxFloat32x2 fUvCoord) return fShadingChangeLuma; } -void UpdateLockStatus(AccumulationPassCommonParams params, +void UpdateLockStatus(const AccumulationPassCommonParams params, FFXM_PARAMETER_INOUT FfxFloat32 fReactiveFactor, LockState state, FFXM_PARAMETER_INOUT FfxFloat32x2 fLockStatus, FFXM_PARAMETER_OUT FfxFloat32 fLockContributionThisFrame, From c92e35ff73f1a332df3ac0726822d6fafd904a67 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 22 Mar 2025 23:09:28 +0100 Subject: [PATCH 49/88] Added balanced and performance preset keywords to the depth clip and accumulate passes, which seem to be the only ones that use it. Makes the Balanced variant work. Performance variant still problematic. --- .../Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader index 9066d1e..5be5aae 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader @@ -59,6 +59,8 @@ Shader "TND/ASR/ffx_fsr2_fs" //#pragma enable_d3d11_debug_symbols #pragma multi_compile __ FFXM_HALF + #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_BALANCED + #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS #pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS #pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH @@ -82,6 +84,8 @@ Shader "TND/ASR/ffx_fsr2_fs" //#pragma enable_d3d11_debug_symbols #pragma multi_compile __ FFXM_HALF + #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_BALANCED + #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE #pragma multi_compile __ FFXM_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE #pragma multi_compile __ FFXM_FSR2_OPTION_HDR_COLOR_INPUT #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS From 4a0fc42c2a195ac87ac8eb8db97fc88d1daa1419 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sat, 22 Mar 2025 23:13:57 +0100 Subject: [PATCH 50/88] Fixed RCAS pass still using compute shader bindings --- .../Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs index a413f46..7c7d191 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs @@ -344,10 +344,9 @@ namespace ArmASR { BeginSample(commandBuffer); - commandBuffer.SetComputeResourceParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvOpaqueOnly, dispatchParams.ColorOpaqueOnly); - commandBuffer.SetComputeResourceParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputColor, dispatchParams.ColorPreUpscale); + commandBuffer.SetGlobalResource(AsrShaderIDs.SrvOpaqueOnly, dispatchParams.ColorOpaqueOnly); + commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputColor, dispatchParams.ColorPreUpscale); - FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbGenReactive, _generateReactiveConstants, 0, _generateReactiveConstants.stride); BlitFragment(commandBuffer, dispatchParams.OutReactive.RenderTarget); From 389faf3b65564b2fadde441b73758a65bb224d9c Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sun, 23 Mar 2025 13:58:54 +0100 Subject: [PATCH 51/88] Minor fixes (clearing auto-exposure to 1e8f achieves faster convergence but might cause issues with OpenGL on Nvidia, keep an eye on this!) --- .../Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs | 2 +- .../Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs index 1ef2b01..9bd1cc0 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs @@ -186,7 +186,7 @@ namespace ArmASR // Auto exposure always used to track luma changes in locking logic commandBuffer.SetRenderTarget(_resources.AutoExposure); - commandBuffer.ClearRenderTarget(false, true, new Color(0f, 1f, 0f, 0f)); + commandBuffer.ClearRenderTarget(false, true, new Color(0f, 1e8f, 0f, 0f)); // Reset atomic counter to 0 commandBuffer.SetRenderTarget(_resources.SpdAtomicCounter); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs index c2eeb65..1d9fd89 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs @@ -114,7 +114,7 @@ namespace ArmASR if (isBalancedOrPerformance) { // Resources FSR2_InternalReactive1/2: FFXM_RESOURCE_USAGE_RENDERTARGET, FFXM_SURFACE_FORMAT_R8_SNORM, FFXM_RESOURCE_FLAGS_NONE - CreateDoubleBufferedResource(InternalReactive, "ASR_InternalReactive", contextDescription.DisplaySize, GraphicsFormat.R8_SNorm); // TODO: R8_SNorm *might* be a problem? + CreateDoubleBufferedResource(InternalReactive, "ASR_InternalReactive", contextDescription.DisplaySize, GraphicsFormat.R8_SNorm); } else // Quality preset specific { @@ -169,7 +169,7 @@ namespace ArmASR public static void DestroyAliasableResources(CommandBuffer commandBuffer) { - // Release all of the aliasable resources used this frame + // Release all the aliasable resources used this frame commandBuffer.ReleaseTemporaryRT(AsrShaderIDs.UavReconstructedPrevNearestDepth); commandBuffer.ReleaseTemporaryRT(AsrShaderIDs.UavDilatedDepth); commandBuffer.ReleaseTemporaryRT(AsrShaderIDs.UavLockInputLuma); From 9ef2bfdef0d02ec0e15e81ed196d1ebf8088a4e7 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sun, 23 Mar 2025 18:56:02 +0100 Subject: [PATCH 52/88] Reverted rework of accumulation parameter initialization function to what it originally was, but with a struct clear to zero. --- .../ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h index 2b28504..1120019 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h @@ -260,8 +260,11 @@ void initIsNewSample(FFXM_PARAMETER_INOUT AccumulationPassCommonParams params) params.bIsNewSample = (params.bIsExistingSample == false || bIsResetFrame); } -void InitParams(FFXM_PARAMETER_INOUT AccumulationPassCommonParams params, FfxInt32x2 iPxHrPos) + +AccumulationPassCommonParams InitParams(FfxInt32x2 iPxHrPos) { + AccumulationPassCommonParams params = (AccumulationPassCommonParams)0; + params.iPxHrPos = iPxHrPos; const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize(); params.fHrUv = fHrUv; @@ -273,12 +276,13 @@ void InitParams(FFXM_PARAMETER_INOUT AccumulationPassCommonParams params, FfxInt params.fHrVelocity = GetPxHrVelocity(params.fMotionVector); ComputeReprojectedUVs(params, params.fReprojectedHrUv, params.bIsExistingSample); + + return params; } AccumulateOutputs Accumulate(FfxInt32x2 iPxHrPos) { - AccumulationPassCommonParams params = (AccumulationPassCommonParams)0; - InitParams(params, iPxHrPos); + AccumulationPassCommonParams params = InitParams(iPxHrPos); FfxFloat32x3 fHistoryColor = FfxFloat32x3(0, 0, 0); FFXM_MIN16_F2 fLockStatus; From 4c93edccded49790a5751ba81e2f8a9a478145c9 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sun, 23 Mar 2025 20:23:07 +0100 Subject: [PATCH 53/88] Added type definitions for PSSL (PS4/5) and removed hard-coded FFMX_HALF defines. Let the multi-compile shaders and C# code control that. --- .../ASR/Shaders/shaders/ffxm_common_types.h | 105 ++++++++++++++++-- .../shaders/ffxm_fsr2_accumulate_pass_fs.hlsl | 3 - .../ffxm_fsr2_autogen_reactive_pass_fs.hlsl | 3 - ...m_fsr2_compute_luminance_pyramid_pass.hlsl | 3 - .../shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl | 3 - .../Shaders/shaders/ffxm_fsr2_lock_pass.hlsl | 3 - .../shaders/ffxm_fsr2_rcas_pass_fs.hlsl | 3 - ...r2_reconstruct_previous_depth_pass_fs.hlsl | 3 - .../ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl | 3 - 9 files changed, 98 insertions(+), 31 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h index fb5e13a..be29d6c 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h @@ -240,11 +240,6 @@ typedef int4 FfxInt32x4; #endif // #if defined(FFXM_HLSL_6_2) -// Arm ASR relies in efficient FP16 arithmetic. -#if !defined(FFXM_HALF) -#define FFXM_HALF (1) -#endif - #if FFXM_HALF #if FFXM_HLSL_6_2 @@ -269,6 +264,29 @@ typedef int16_t FfxInt16; typedef int16_t2 FfxInt16x2; typedef int16_t3 FfxInt16x3; typedef int16_t4 FfxInt16x4; +#elif SHADER_API_PSSL +#pragma argument(realtypes) // Enable true 16-bit types + +typedef half FfxFloat16; +typedef half2 FfxFloat16x2; +typedef half3 FfxFloat16x3; +typedef half4 FfxFloat16x4; + +/// A typedef for an unsigned 16bit integer. +/// +/// @ingroup GPU +typedef ushort FfxUInt16; +typedef ushort2 FfxUInt16x2; +typedef ushort3 FfxUInt16x3; +typedef ushort4 FfxUInt16x4; + +/// A typedef for a signed 16bit integer. +/// +/// @ingroup GPU +typedef short FfxInt16; +typedef short2 FfxInt16x2; +typedef short3 FfxInt16x3; +typedef short4 FfxInt16x4; #else // #if FFXM_HLSL_6_2 typedef min16float FfxFloat16; typedef min16float2 FfxFloat16x2; @@ -340,7 +358,7 @@ typedef min16int4 FfxInt16x4; #endif // #if defined(FFXM_GLSL) -#if FFXM_HALF +#if FFXM_HALF && !defined(SHADER_API_PSSL) #if FFXM_HLSL_6_2 @@ -378,7 +396,7 @@ typedef min16int4 FfxInt16x4; #if defined(FFXM_GPU) // Common typedefs: -#if defined(FFXM_HLSL) +#if defined(FFXM_HLSL) && !defined(SHADER_API_PSSL) FFXM_MIN16_SCALAR( FFXM_MIN16_F , float ); FFXM_MIN16_VECTOR( FFXM_MIN16_F2, float, 2 ); FFXM_MIN16_VECTOR( FFXM_MIN16_F3, float, 3 ); @@ -452,6 +470,79 @@ typedef FfxUInt32x3 Prefix##_U3; \ typedef FfxUInt32x4 Prefix##_U4; #endif // #if defined(FFXM_HLSL) +#if defined(SHADER_API_PSSL) + +#define unorm +#define globallycoherent + +#if FFX_HALF + +#define FFXM_MIN16_F half +#define FFXM_MIN16_F2 half2 +#define FFXM_MIN16_F3 half3 +#define FFXM_MIN16_F4 half4 + +#define FFXM_MIN16_I short +#define FFXM_MIN16_I2 short2 +#define FFXM_MIN16_I3 short3 +#define FFXM_MIN16_I4 short4 + +#define FFXM_MIN16_U ushort +#define FFXM_MIN16_U2 ushort2 +#define FFXM_MIN16_U3 ushort3 +#define FFXM_MIN16_U4 ushort4 + +#define FFXM_16BIT_F half +#define FFXM_16BIT_F2 half2 +#define FFXM_16BIT_F3 half3 +#define FFXM_16BIT_F4 half4 + +#define FFXM_16BIT_I short +#define FFXM_16BIT_I2 short2 +#define FFXM_16BIT_I3 short3 +#define FFXM_16BIT_I4 short4 + +#define FFXM_16BIT_U ushort +#define FFXM_16BIT_U2 ushort2 +#define FFXM_16BIT_U3 ushort3 +#define FFXM_16BIT_U4 ushort4 + +#else // FFX_HALF + +#define FFXM_MIN16_F float +#define FFXM_MIN16_F2 float2 +#define FFXM_MIN16_F3 float3 +#define FFXM_MIN16_F4 float4 + +#define FFXM_MIN16_I int +#define FFXM_MIN16_I2 int2 +#define FFXM_MIN16_I3 int3 +#define FFXM_MIN16_I4 int4 + +#define FFXM_MIN16_U uint +#define FFXM_MIN16_U2 uint2 +#define FFXM_MIN16_U3 uint3 +#define FFXM_MIN16_U4 uint4 + +#define FFXM_16BIT_F float +#define FFXM_16BIT_F2 float2 +#define FFXM_16BIT_F3 float3 +#define FFXM_16BIT_F4 float4 + +#define FFXM_16BIT_I int +#define FFXM_16BIT_I2 int2 +#define FFXM_16BIT_I3 int3 +#define FFXM_16BIT_I4 int4 + +#define FFXM_16BIT_U uint +#define FFXM_16BIT_U2 uint2 +#define FFXM_16BIT_U3 uint3 +#define FFXM_16BIT_U4 uint4 + +#endif // FFX_HALF + +#endif // #if defined(SHADER_API_PSSL) + #if defined(FFXM_GLSL) #if FFXM_HALF diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl index 49dbed7..5e846c4 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl @@ -41,9 +41,6 @@ #define FSR2_BIND_CB_FSR2 0 // Global mandatory defines -#if !defined(FFXM_HALF) -#define FFXM_HALF 1 -#endif #if !defined(FFXM_GPU) #define FFXM_GPU 1 #endif diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl index 124c73c..34fa02f 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl @@ -26,9 +26,6 @@ #define FSR2_BIND_CB_REACTIVE 1 // Global mandatory defines -#if !defined(FFXM_HALF) -#define FFXM_HALF 1 -#endif #if !defined(FFXM_GPU) #define FFXM_GPU 1 #endif diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl index 3e73211..cba3d0b 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl @@ -30,9 +30,6 @@ #define FSR2_BIND_CB_SPD 1 // Global mandatory defines -#if !defined(FFXM_HALF) -#define FFXM_HALF 1 -#endif #if !defined(FFXM_GPU) #define FFXM_GPU 1 #endif diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl index de93855..60c4dfa 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl @@ -33,9 +33,6 @@ #define FSR2_BIND_CB_FSR2 0 // Global mandatory defines -#if !defined(FFXM_HALF) -#define FFXM_HALF 1 -#endif #if !defined(FFXM_GPU) #define FFXM_GPU 1 #endif diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_lock_pass.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_lock_pass.hlsl index f8166e1..2da63f2 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_lock_pass.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_lock_pass.hlsl @@ -27,9 +27,6 @@ #define FSR2_BIND_CB_FSR2 0 // Global mandatory defines -#if !defined(FFXM_HALF) -#define FFXM_HALF 1 -#endif #if !defined(FFXM_GPU) #define FFXM_GPU 1 #endif diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl index 95d6c61..6031679 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl @@ -26,9 +26,6 @@ #define FSR2_BIND_CB_RCAS 1 // Global mandatory defines -#if !defined(FFXM_HALF) -#define FFXM_HALF 1 -#endif #if !defined(FFXM_GPU) #define FFXM_GPU 1 #endif diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl index ff65021..57e3e68 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl @@ -29,9 +29,6 @@ #define FSR2_BIND_CB_FSR2 0 // Global mandatory defines -#if !defined(FFXM_HALF) -#define FFXM_HALF 1 -#endif #if !defined(FFXM_GPU) #define FFXM_GPU 1 #endif diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl index edb6a70..71bdcab 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl @@ -20,9 +20,6 @@ // SOFTWARE. // Global mandatory defines -#if !defined(FFXM_HALF) -#define FFXM_HALF 1 -#endif #if !defined(FFXM_GPU) #define FFXM_GPU 1 #endif From 6824f1f652b30a698e8e9c65ffd24b4c41eaeb5c Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sun, 23 Mar 2025 20:23:29 +0100 Subject: [PATCH 54/88] Enable FP16 (FFXM_HALF keyword) usage by default --- .../PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs index 47107cc..bf08c6f 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs @@ -17,7 +17,7 @@ namespace UnityEngine.Rendering.PostProcessing public override void CreateContext(PostProcessRenderContext context, Upscaling config) { // Initialize ASR context - Asr.InitializationFlags flags = 0; + Asr.InitializationFlags flags = Asr.InitializationFlags.EnableFP16Usage; if (context.camera.allowHDR) flags |= Asr.InitializationFlags.EnableHighDynamicRange; if (config.exposureSource == Upscaling.ExposureSource.Auto) flags |= Asr.InitializationFlags.EnableAutoExposure; if (RuntimeUtilities.IsDynamicResolutionEnabled(context.camera)) flags |= Asr.InitializationFlags.EnableDynamicResolution; From c4b59c7dc3a92aeacd9bc34c7b6e337c1c0a1c2b Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sun, 23 Mar 2025 22:38:28 +0100 Subject: [PATCH 55/88] Bunch of fixes to make ASR Quality somewhat work on PS5, without reactive mask still: - Define macros for various VS/FS binding intrinsics - Use pragmas to set output formats for render targets that aren't normal RGBA - Added alpha channel to color output targets, so that they're 4-channel instead of 3 - Pass FSR2 constant buffer to reactive mask shader (may not matter) --- .../Effects/Upscaling/ASR/Runtime/AsrContext.cs | 2 +- .../Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs | 5 +++-- .../Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc | 9 +++++++++ .../shaders/ffxm_fsr2_accumulate_pass_fs.hlsl | 12 +++++++----- .../shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl | 2 ++ .../shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl | 2 ++ .../ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl | 4 ++-- ...ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl | 4 ++++ 8 files changed, 30 insertions(+), 10 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs index 9bd1cc0..d178599 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs @@ -96,7 +96,7 @@ namespace ArmASR _lockPass = new AsrLockPass(_contextDescription, _resources, _upscalerConstantsBuffer); _accumulatePass = new AsrAccumulatePass(_contextDescription, _resources, _upscalerConstantsBuffer); _sharpenPass = new AsrSharpenPass(_contextDescription, _resources, _upscalerConstantsBuffer, _rcasConstantsBuffer); - _generateReactivePass = new AsrGenerateReactivePass(_contextDescription, _resources, _generateReactiveConstantsBuffer); + _generateReactivePass = new AsrGenerateReactivePass(_contextDescription, _resources, _upscalerConstantsBuffer, _generateReactiveConstantsBuffer); } public void Destroy() diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs index 7c7d191..d04be56 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs @@ -328,8 +328,8 @@ namespace ArmASR { private readonly ComputeBuffer _generateReactiveConstants; - public AsrGenerateReactivePass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer generateReactiveConstants) - : base(contextDescription, resources, null) + public AsrGenerateReactivePass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants, ComputeBuffer generateReactiveConstants) + : base(contextDescription, resources, constants) { _generateReactiveConstants = generateReactiveConstants; @@ -347,6 +347,7 @@ namespace ArmASR commandBuffer.SetGlobalResource(AsrShaderIDs.SrvOpaqueOnly, dispatchParams.ColorOpaqueOnly); commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputColor, dispatchParams.ColorPreUpscale); + FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbGenReactive, _generateReactiveConstants, 0, _generateReactiveConstants.stride); BlitFragment(commandBuffer, dispatchParams.OutReactive.RenderTarget); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc index 895e4cf..a583e19 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc @@ -31,3 +31,12 @@ #define InterlockedMin(dest, val) { (dest) = min((dest), (val)); } #define InterlockedMax(dest, val) { (dest) = max((dest), (val)); } #endif + +#if defined(SHADER_API_PSSL) +#define SV_VERTEXID S_VERTEX_ID +#define SV_POSITION S_POSITION +#define SV_TARGET0 S_TARGET_OUTPUT0 +#define SV_TARGET1 S_TARGET_OUTPUT1 +#define SV_TARGET2 S_TARGET_OUTPUT2 +#define SV_TARGET3 S_TARGET_OUTPUT3 +#endif diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl index 5e846c4..4038324 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl @@ -56,6 +56,8 @@ #include "fsr2/ffxm_fsr2_reproject.h" #include "fsr2/ffxm_fsr2_accumulate.h" +#pragma PSSL_target_output_format(target 1 FMT_FP16_ABGR) + struct AccumulateOutputsFS { #if !FFXM_SHADER_QUALITY_BALANCED_OR_PERFORMANCE @@ -63,14 +65,14 @@ struct AccumulateOutputsFS FfxFloat32x2 fLockStatus : SV_TARGET1; FfxFloat32x4 fLumaHistory : SV_TARGET2; #if FFXM_FSR2_OPTION_APPLY_SHARPENING == 0 - FfxFloat32x3 fColor : SV_TARGET3; + FfxFloat32x4 fColor : SV_TARGET3; #endif #else // FFXM_SHADER_QUALITY_BALANCED_OR_PERFORMANCE - FfxFloat32x3 fUpscaledColor : SV_TARGET0; + FfxFloat32x4 fUpscaledColor : SV_TARGET0; FfxFloat32 fTemporalReactive : SV_TARGET1; FfxFloat32x2 fLockStatus : SV_TARGET2; #if FFXM_FSR2_OPTION_APPLY_SHARPENING == 0 - FfxFloat32x3 fColor : SV_TARGET3; + FfxFloat32x4 fColor : SV_TARGET3; #endif #endif }; @@ -84,12 +86,12 @@ AccumulateOutputsFS main(float4 SvPosition : SV_POSITION) output.fColorAndWeight = result.fColorAndWeight; output.fLumaHistory = result.fLumaHistory; #else - output.fUpscaledColor = result.fUpscaledColor; + output.fUpscaledColor = FfxFloat32x4(result.fUpscaledColor, 1.0f); output.fTemporalReactive = result.fTemporalReactive; #endif output.fLockStatus = result.fLockStatus; #if FFXM_FSR2_OPTION_APPLY_SHARPENING == 0 - output.fColor = result.fColor; + output.fColor = FfxFloat32x4(result.fColor, 1.0f); #endif return output; } \ No newline at end of file diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl index 34fa02f..d87abd8 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl @@ -36,6 +36,8 @@ #include "fsr2/ffxm_fsr2_callbacks_hlsl.h" #include "fsr2/ffxm_fsr2_common.h" +#pragma PSSL_target_output_format(default FMT_32_R) + struct GenReactiveMaskOutputs { FfxFloat32 fReactiveMask : SV_TARGET0; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl index 60c4dfa..ed78125 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl @@ -45,6 +45,8 @@ #include "fsr2/ffxm_fsr2_sample.h" #include "fsr2/ffxm_fsr2_depth_clip.h" +#pragma PSSL_target_output_format(target 0 FMT_FP16_ABGR) + struct DepthClipOutputsFS { FfxFloat32x2 fDilatedReactiveMasks : SV_TARGET0; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl index 6031679..b5585ec 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl @@ -39,7 +39,7 @@ struct RCASOutputsFS { - FfxFloat32x3 fUpscaledColor : SV_TARGET0; + FfxFloat32x4 fUpscaledColor : SV_TARGET0; }; RCASOutputsFS main(float4 SvPosition : SV_POSITION) @@ -47,6 +47,6 @@ RCASOutputsFS main(float4 SvPosition : SV_POSITION) uint2 uPixelCoord = uint2(SvPosition.xy); RCASOutputs result = RCAS(uPixelCoord); RCASOutputsFS output = (RCASOutputsFS)0; - output.fUpscaledColor = result.fUpscaledColor; + output.fUpscaledColor = FfxFloat32x4(result.fUpscaledColor, 1.0f); return output; } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl index 57e3e68..b67ff65 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl @@ -41,6 +41,10 @@ #include "fsr2/ffxm_fsr2_sample.h" #include "fsr2/ffxm_fsr2_reconstruct_dilated_velocity_and_previous_depth.h" +#pragma PSSL_target_output_format(target 0 FMT_32_R) +#pragma PSSL_target_output_format(target 1 FMT_FP16_ABGR) +#pragma PSSL_target_output_format(target 2 FMT_32_R) + struct ReconstructPrevDepthOutputsFS { FfxFloat32 fDepth : SV_TARGET0; From c0cf3de36435b9da744652fdd78f2057dcc89ed2 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Mon, 24 Mar 2025 11:22:26 +0100 Subject: [PATCH 56/88] Temporarily disable FP16 altogether, which makes PSSL shader compilation a lot happier. Need to figure out how to make the FP16 utility function definitions work without the shader compiler complaining about duplicate declarations. --- .../ffxm_fsr2_compute_luminance_pyramid_pass.compute | 2 +- .../Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader | 8 ++++---- .../Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute index 5d4044b..34e0436 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute @@ -1,6 +1,6 @@ #pragma kernel main -#pragma multi_compile __ FFXM_HALF +//#pragma multi_compile __ FFXM_HALF #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS #pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS #pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader index 5be5aae..ce41e15 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader @@ -14,7 +14,7 @@ Shader "TND/ASR/ffx_fsr2_fs" #pragma target 4.5 //#pragma enable_d3d11_debug_symbols - #pragma multi_compile __ FFXM_HALF + //#pragma multi_compile __ FFXM_HALF #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY #include "ffxm_fsr2_common.cginc" @@ -34,7 +34,7 @@ Shader "TND/ASR/ffx_fsr2_fs" #pragma target 4.5 //#pragma enable_d3d11_debug_symbols - #pragma multi_compile __ FFXM_HALF + //#pragma multi_compile __ FFXM_HALF #pragma multi_compile __ FFXM_FSR2_OPTION_HDR_COLOR_INPUT #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS #pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS @@ -58,7 +58,7 @@ Shader "TND/ASR/ffx_fsr2_fs" #pragma target 4.5 //#pragma enable_d3d11_debug_symbols - #pragma multi_compile __ FFXM_HALF + //#pragma multi_compile __ FFXM_HALF #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_BALANCED #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS @@ -83,7 +83,7 @@ Shader "TND/ASR/ffx_fsr2_fs" #pragma target 4.5 //#pragma enable_d3d11_debug_symbols - #pragma multi_compile __ FFXM_HALF + //#pragma multi_compile __ FFXM_HALF #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_BALANCED #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE #pragma multi_compile __ FFXM_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute index 660b922..c4c47e5 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute @@ -1,6 +1,6 @@ #pragma kernel main -#pragma multi_compile __ FFXM_HALF +//#pragma multi_compile __ FFXM_HALF #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS #pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS #pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH From d088d440f577f209cfa32fe27c40c5c1ae9fbea3 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Mon, 24 Mar 2025 12:08:53 +0100 Subject: [PATCH 57/88] Allow live switching between FP16 and FP32 --- .../PostProcessing/Editor/PostProcessLayerEditor.cs | 3 +++ .../PostProcessing/Runtime/Effects/Upscaling.cs | 6 +++++- .../PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Editor/PostProcessLayerEditor.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Editor/PostProcessLayerEditor.cs index c89b4bd..d4e5f8d 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Editor/PostProcessLayerEditor.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Editor/PostProcessLayerEditor.cs @@ -34,6 +34,7 @@ namespace UnityEditor.Rendering.PostProcessing SerializedProperty m_FsrQualityMode; SerializedProperty m_FsrPerformSharpen; SerializedProperty m_FsrSharpness; + SerializedProperty m_FsrEnableFP16; SerializedProperty m_FsrExposureSource; SerializedProperty m_FsrExposureTexture; SerializedProperty m_FsrPreExposure; @@ -94,6 +95,7 @@ namespace UnityEditor.Rendering.PostProcessing m_FsrQualityMode = FindProperty(x => x.upscaling.qualityMode); m_FsrPerformSharpen = FindProperty(x => x.upscaling.performSharpenPass); m_FsrSharpness = FindProperty(x => x.upscaling.sharpness); + m_FsrEnableFP16 = FindProperty(x => x.upscaling.enableFP16); m_FsrExposureSource = FindProperty(x => x.upscaling.exposureSource); m_FsrExposureTexture = FindProperty(x => x.upscaling.exposure); m_FsrPreExposure = FindProperty(x => x.upscaling.preExposure); @@ -230,6 +232,7 @@ namespace UnityEditor.Rendering.PostProcessing EditorGUILayout.PropertyField(m_FsrQualityMode); EditorGUILayout.PropertyField(m_FsrPerformSharpen); EditorGUILayout.PropertyField(m_FsrSharpness); + EditorGUILayout.PropertyField(m_FsrEnableFP16); EditorGUILayout.PropertyField(m_FsrExposureSource); if (m_FsrExposureSource.intValue == (int)Upscaling.ExposureSource.Manual) EditorGUILayout.PropertyField(m_FsrExposureTexture); EditorGUILayout.PropertyField(m_FsrPreExposure); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs index 68727e5..1edb208 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs @@ -34,6 +34,8 @@ namespace UnityEngine.Rendering.PostProcessing public bool performSharpenPass = true; [Tooltip("Strength of the sharpening effect.")] [Range(0, 1)] public float sharpness = 0.8f; + + public bool enableFP16 = true; [Tooltip("Choose where to get the exposure value from. Use auto-exposure from either the upscaler or Unity, provide a manual exposure texture, or use a default value.")] public ExposureSource exposureSource = ExposureSource.Auto; @@ -112,6 +114,7 @@ namespace UnityEngine.Rendering.PostProcessing private Fsr2.QualityMode _prevQualityMode; private ExposureSource _prevExposureSource; private Vector2Int _prevUpscaleSize; + private bool _prevFP16; private Rect _originalRect; @@ -165,7 +168,7 @@ namespace UnityEngine.Rendering.PostProcessing // Monitor for any resolution changes and recreate the upscaler context if necessary // We can't create an upscaler context without info from the post-processing context, so delay the initial setup until here if (!_initialized || _upscaler == null || _upscaleSize.x != _prevUpscaleSize.x || _upscaleSize.y != _prevUpscaleSize.y || - upscalerType != _prevUpscalerType || qualityMode != _prevQualityMode || exposureSource != _prevExposureSource) + upscalerType != _prevUpscalerType || qualityMode != _prevQualityMode || exposureSource != _prevExposureSource || enableFP16 != _prevFP16) { DestroyUpscaler(); CreateUpscaler(context); @@ -199,6 +202,7 @@ namespace UnityEngine.Rendering.PostProcessing _prevQualityMode = qualityMode; _prevExposureSource = exposureSource; _prevUpscaleSize = _upscaleSize; + _prevFP16 = enableFP16; _callbacks = callbacksFactory(context); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs index bf08c6f..172520f 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs @@ -17,7 +17,7 @@ namespace UnityEngine.Rendering.PostProcessing public override void CreateContext(PostProcessRenderContext context, Upscaling config) { // Initialize ASR context - Asr.InitializationFlags flags = Asr.InitializationFlags.EnableFP16Usage; + Asr.InitializationFlags flags = config.enableFP16 ? Asr.InitializationFlags.EnableFP16Usage : 0; if (context.camera.allowHDR) flags |= Asr.InitializationFlags.EnableHighDynamicRange; if (config.exposureSource == Upscaling.ExposureSource.Auto) flags |= Asr.InitializationFlags.EnableAutoExposure; if (RuntimeUtilities.IsDynamicResolutionEnabled(context.camera)) flags |= Asr.InitializationFlags.EnableDynamicResolution; From 090b9135e8c72563c869515ed15155b26a14a316 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Mon, 24 Mar 2025 13:56:39 +0100 Subject: [PATCH 58/88] Reworked auto-exposure to use a double buffered render texture, which allows the smooth exposure transition logic to load a value that's guaranteed to be from the previous frame. Fixes artifacting and flickering issues caused by loading & storing to the same texture. --- .../Effects/Upscaling/ASR/Runtime/AsrContext.cs | 8 ++++---- .../Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs | 5 +++-- .../Effects/Upscaling/ASR/Runtime/AsrResources.cs | 11 +++++------ .../ffxm_fsr2_compute_luminance_pyramid_pass.hlsl | 1 + .../Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h | 4 +++- 5 files changed, 16 insertions(+), 13 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs index 9bd1cc0..cdcca6b 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs @@ -148,7 +148,7 @@ namespace ArmASR // If auto exposure is enabled use the auto exposure SRV, otherwise what the app sends if ((_contextDescription.Flags & Asr.InitializationFlags.EnableAutoExposure) != 0) - dispatchParams.Exposure = new ResourceView(_resources.AutoExposure); + dispatchParams.Exposure = new ResourceView(_resources.AutoExposure[frameIndex]); else if (!dispatchParams.Exposure.IsValid) dispatchParams.Exposure = new ResourceView(_resources.DefaultExposure); @@ -185,9 +185,9 @@ namespace ArmASR commandBuffer.ClearRenderTarget(false, true, Color.clear); // Auto exposure always used to track luma changes in locking logic - commandBuffer.SetRenderTarget(_resources.AutoExposure); - commandBuffer.ClearRenderTarget(false, true, new Color(0f, 1e8f, 0f, 0f)); - + commandBuffer.SetRenderTarget(_resources.AutoExposure[frameIndex ^ 1]); + commandBuffer.ClearRenderTarget(false, true, new Color(-1f, 1e8f, 0f, 0f)); + // Reset atomic counter to 0 commandBuffer.SetRenderTarget(_resources.SpdAtomicCounter); commandBuffer.ClearRenderTarget(false, true, Color.clear); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs index 7c7d191..4139b64 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs @@ -147,11 +147,12 @@ namespace ArmASR protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { commandBuffer.SetComputeResourceParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputColor, dispatchParams.Color); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvAutoExposure, Resources.AutoExposure[frameIndex ^ 1]); commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavSpdAtomicCount, Resources.SpdAtomicCounter); commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavExposureMipLumaChange, Resources.SceneLuminance, ShadingChangeMipLevel); commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavExposureMip5, Resources.SceneLuminance, 5); - commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavAutoExposure, Resources.AutoExposure); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavAutoExposure, Resources.AutoExposure[frameIndex]); commandBuffer.SetComputeConstantBufferParam(ComputeShader, AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); commandBuffer.SetComputeConstantBufferParam(ComputeShader, AsrShaderIDs.CbSpd, _spdConstants, 0, _spdConstants.stride); @@ -273,7 +274,7 @@ namespace ArmASR commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvLanczosLut, Resources.LanczosLut); commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvUpscaleMaximumBiasLut, Resources.MaximumBiasLut); commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvSceneLuminanceMips, Resources.SceneLuminance); - commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvAutoExposure, Resources.AutoExposure); + commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvAutoExposure, Resources.AutoExposure[frameIndex]); commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvLumaHistory, Resources.LumaHistory[frameIndex ^ 1]); commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvInternalTemporalReactive, Resources.InternalReactive[frameIndex ^ 1]); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs index 1d9fd89..89ab6f8 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs @@ -36,8 +36,8 @@ namespace ArmASR public Texture2D LanczosLut; public Texture2D MaximumBiasLut; public RenderTexture SpdAtomicCounter; - public RenderTexture AutoExposure; public RenderTexture SceneLuminance; + public readonly RenderTexture[] AutoExposure = new RenderTexture[2]; public readonly RenderTexture[] DilatedMotionVectors = new RenderTexture[2]; public readonly RenderTexture[] LockStatus = new RenderTexture[2]; public readonly RenderTexture[] InternalUpscaled = new RenderTexture[2]; @@ -90,10 +90,6 @@ namespace ArmASR SpdAtomicCounter = new RenderTexture(1, 1, 0, GraphicsFormat.R32_UInt) { name = "ASR_SpdAtomicCounter", enableRandomWrite = true }; SpdAtomicCounter.Create(); - // Resource FSR2_AutoExposure: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32G32_FLOAT, FFX_RESOURCE_FLAGS_NONE - AutoExposure = new RenderTexture(1, 1, 0, rg16Format) { name = "ASR_AutoExposure", enableRandomWrite = true }; - AutoExposure.Create(); - // Resource FSR2_ExposureMips: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE // This is a rather special case: it's an aliasable resource, but because we require a mipmap chain and bind specific mip levels per shader, we can't easily use temporary RTs for this. int w = contextDescription.MaxRenderSize.x / 2, h = contextDescription.MaxRenderSize.y / 2; @@ -101,6 +97,9 @@ namespace ArmASR SceneLuminance = new RenderTexture(w, h, 0, r16Format, mipCount) { name = "ASR_ExposureMips", enableRandomWrite = true, useMipMap = true, autoGenerateMips = false }; SceneLuminance.Create(); + // Resource FSR2_AutoExposure: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32G32_FLOAT, FFX_RESOURCE_FLAGS_NONE + CreateDoubleBufferedResource(AutoExposure, "ASR_AutoExposure", Vector2Int.one, rg16Format); + // Resources FSR2_InternalDilatedVelocity1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16_FLOAT, FFX_RESOURCE_FLAGS_NONE CreateDoubleBufferedResource(DilatedMotionVectors, "ASR_InternalDilatedVelocity", contextDescription.MaxRenderSize, GraphicsFormat.R16G16_SFloat); @@ -194,8 +193,8 @@ namespace ArmASR DestroyResource(InternalUpscaled); DestroyResource(LockStatus); DestroyResource(DilatedMotionVectors); + DestroyResource(AutoExposure); DestroyResource(ref SceneLuminance); - DestroyResource(ref AutoExposure); DestroyResource(ref DefaultReactive); DestroyResource(ref DefaultExposure); DestroyResource(ref MaximumBiasLut); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl index cba3d0b..314e189 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl @@ -20,6 +20,7 @@ // SOFTWARE. #define FSR2_BIND_SRV_INPUT_COLOR 0 +#define FSR2_BIND_SRV_AUTO_EXPOSURE 1 #define FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC 1 #define FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE 2 diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h index c277f7d..6159912 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h @@ -946,7 +946,9 @@ FfxFloat32x3 LoadOpaqueOnly(FFXM_PARAMETER_IN FFXM_MIN16_I2 iPxPos) FfxFloat32x2 SPD_LoadExposureBuffer() { -#if defined FSR2_BIND_UAV_AUTO_EXPOSURE +#if defined FSR2_BIND_SRV_AUTO_EXPOSURE + return r_auto_exposure[FfxInt32x2(0, 0)].rg; +#elif defined FSR2_BIND_UAV_AUTO_EXPOSURE return rw_auto_exposure[FfxInt32x2(0, 0)].rg; #else return FfxFloat32x2(0.f, 0.f); From 8d85253def913103c019073fdd1aa664977f370a Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Mon, 24 Mar 2025 14:34:13 +0100 Subject: [PATCH 59/88] Enabled Native16Bit requirement for Vulkan, which sets a precedent for other graphics APIs to use this as well --- .../Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc index 895e4cf..2e3c9b2 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc @@ -3,6 +3,13 @@ #pragma warning(disable: 3205) // Conversion from larger type to smaller, possible loss of data #pragma warning(disable: 3556) // Integer divides might be much slower, try using uints if possible +// Doesn't work for: +// - DX11: forces use of DXC which is not supported +// - XB1: native FP16 seems to be unsupported +#if defined(SHADER_API_VULKAN) +#pragma require Native16Bit +#endif + // If these keywords are set by Unity, redefine them to have a truthy value #ifdef FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE #undef FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE From e4ad05a4143a6ad7ca9fbd8103b5bdbfd4f44310 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Mon, 24 Mar 2025 16:40:23 +0100 Subject: [PATCH 60/88] Made PSSL pragmas only get included for PSSL to prevent compiler warnings, and require native 16-bit compilation for PSSL. --- .../Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc | 2 +- .../ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl | 2 ++ .../ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl | 2 ++ .../ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl | 2 ++ .../shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl | 2 ++ 5 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc index b0fa3da..8b8a686 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc @@ -6,7 +6,7 @@ // Doesn't work for: // - DX11: forces use of DXC which is not supported // - XB1: native FP16 seems to be unsupported -#if defined(SHADER_API_VULKAN) +#if defined(SHADER_API_VULKAN) || defined(SHADER_API_PSSL) #pragma require Native16Bit #endif diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl index 4038324..a5959f7 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl @@ -56,7 +56,9 @@ #include "fsr2/ffxm_fsr2_reproject.h" #include "fsr2/ffxm_fsr2_accumulate.h" +#if defined(SHADER_API_PSSL) #pragma PSSL_target_output_format(target 1 FMT_FP16_ABGR) +#endif struct AccumulateOutputsFS { diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl index d87abd8..24172a5 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl @@ -36,7 +36,9 @@ #include "fsr2/ffxm_fsr2_callbacks_hlsl.h" #include "fsr2/ffxm_fsr2_common.h" +#if defined(SHADER_API_PSSL) #pragma PSSL_target_output_format(default FMT_32_R) +#endif struct GenReactiveMaskOutputs { diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl index ed78125..d9db178 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl @@ -45,7 +45,9 @@ #include "fsr2/ffxm_fsr2_sample.h" #include "fsr2/ffxm_fsr2_depth_clip.h" +#if defined(SHADER_API_PSSL) #pragma PSSL_target_output_format(target 0 FMT_FP16_ABGR) +#endif struct DepthClipOutputsFS { diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl index b67ff65..c07b3c2 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl @@ -41,9 +41,11 @@ #include "fsr2/ffxm_fsr2_sample.h" #include "fsr2/ffxm_fsr2_reconstruct_dilated_velocity_and_previous_depth.h" +#if defined(SHADER_API_PSSL) #pragma PSSL_target_output_format(target 0 FMT_32_R) #pragma PSSL_target_output_format(target 1 FMT_FP16_ABGR) #pragma PSSL_target_output_format(target 2 FMT_32_R) +#endif struct ReconstructPrevDepthOutputsFS { From 884ca840715346a5a5a1117da316b8c022ca5a9d Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Mon, 24 Mar 2025 16:43:32 +0100 Subject: [PATCH 61/88] Moved unorm and globallycoherent definitions to the common cginc, rather than stuffing them somewhere in between all the types --- .../Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc | 3 +++ .../Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc index 8b8a686..b7143ef 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc @@ -39,6 +39,7 @@ #define InterlockedMax(dest, val) { (dest) = max((dest), (val)); } #endif +// PSSL uses different semantics and doesn't support certain type qualifiers #if defined(SHADER_API_PSSL) #define SV_VERTEXID S_VERTEX_ID #define SV_POSITION S_POSITION @@ -46,4 +47,6 @@ #define SV_TARGET1 S_TARGET_OUTPUT1 #define SV_TARGET2 S_TARGET_OUTPUT2 #define SV_TARGET3 S_TARGET_OUTPUT3 +#define unorm +#define globallycoherent #endif diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h index be29d6c..bdef206 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h @@ -472,9 +472,6 @@ typedef FfxUInt32x4 Prefix##_U4; #if defined(SHADER_API_PSSL) -#define unorm -#define globallycoherent - #if FFX_HALF #define FFXM_MIN16_F half From 5f4d4b37ad4540125f228c62bb6d04e65ae13152 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Mon, 24 Mar 2025 17:18:30 +0100 Subject: [PATCH 62/88] Fixed a big derp in the PSSL type definitions, copy-pasted code still contained FFX_HALF instead of FFXM_HALF, which caused 16-bit types to be compiled as 32-bit, causing a bunch of duplicate function definitions. --- .../ffxm_fsr2_compute_luminance_pyramid_pass.compute | 2 +- .../Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader | 8 ++++---- .../Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute | 2 +- .../Upscaling/ASR/Shaders/shaders/ffxm_common_types.h | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute index 34e0436..5d4044b 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute @@ -1,6 +1,6 @@ #pragma kernel main -//#pragma multi_compile __ FFXM_HALF +#pragma multi_compile __ FFXM_HALF #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS #pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS #pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader index ce41e15..5be5aae 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader @@ -14,7 +14,7 @@ Shader "TND/ASR/ffx_fsr2_fs" #pragma target 4.5 //#pragma enable_d3d11_debug_symbols - //#pragma multi_compile __ FFXM_HALF + #pragma multi_compile __ FFXM_HALF #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY #include "ffxm_fsr2_common.cginc" @@ -34,7 +34,7 @@ Shader "TND/ASR/ffx_fsr2_fs" #pragma target 4.5 //#pragma enable_d3d11_debug_symbols - //#pragma multi_compile __ FFXM_HALF + #pragma multi_compile __ FFXM_HALF #pragma multi_compile __ FFXM_FSR2_OPTION_HDR_COLOR_INPUT #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS #pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS @@ -58,7 +58,7 @@ Shader "TND/ASR/ffx_fsr2_fs" #pragma target 4.5 //#pragma enable_d3d11_debug_symbols - //#pragma multi_compile __ FFXM_HALF + #pragma multi_compile __ FFXM_HALF #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_BALANCED #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS @@ -83,7 +83,7 @@ Shader "TND/ASR/ffx_fsr2_fs" #pragma target 4.5 //#pragma enable_d3d11_debug_symbols - //#pragma multi_compile __ FFXM_HALF + #pragma multi_compile __ FFXM_HALF #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_BALANCED #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE #pragma multi_compile __ FFXM_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute index c4c47e5..660b922 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute @@ -1,6 +1,6 @@ #pragma kernel main -//#pragma multi_compile __ FFXM_HALF +#pragma multi_compile __ FFXM_HALF #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS #pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS #pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h index bdef206..8628721 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h @@ -472,7 +472,7 @@ typedef FfxUInt32x4 Prefix##_U4; #if defined(SHADER_API_PSSL) -#if FFX_HALF +#if FFXM_HALF #define FFXM_MIN16_F half #define FFXM_MIN16_F2 half2 @@ -504,7 +504,7 @@ typedef FfxUInt32x4 Prefix##_U4; #define FFXM_16BIT_U3 ushort3 #define FFXM_16BIT_U4 ushort4 -#else // FFX_HALF +#else // FFXM_HALF #define FFXM_MIN16_F float #define FFXM_MIN16_F2 float2 @@ -536,7 +536,7 @@ typedef FfxUInt32x4 Prefix##_U4; #define FFXM_16BIT_U3 uint3 #define FFXM_16BIT_U4 uint4 -#endif // FFX_HALF +#endif // FFXM_HALF #endif // #if defined(SHADER_API_PSSL) From bd18c12408495a2b88df7c05f6b675c6d56c0db9 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Mon, 24 Mar 2025 18:40:07 +0100 Subject: [PATCH 63/88] Updated PSSL target output format pragmas with what *should* be the correct values, though the PS5 still disagrees with this --- .../ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl | 2 ++ .../ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl | 2 +- .../ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl index a5959f7..10d9508 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl @@ -57,7 +57,9 @@ #include "fsr2/ffxm_fsr2_accumulate.h" #if defined(SHADER_API_PSSL) +#pragma PSSL_target_output_format(target 0 FMT_FP16_ABGR) #pragma PSSL_target_output_format(target 1 FMT_FP16_ABGR) +#pragma PSSL_target_output_format(target 2 FMT_FP16_ABGR) #endif struct AccumulateOutputsFS diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl index 24172a5..64b5b29 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl @@ -37,7 +37,7 @@ #include "fsr2/ffxm_fsr2_common.h" #if defined(SHADER_API_PSSL) -#pragma PSSL_target_output_format(default FMT_32_R) +#pragma PSSL_target_output_format(default FMT_FP16_ABGR) #endif struct GenReactiveMaskOutputs diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl index d9db178..86f042a 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl @@ -47,6 +47,7 @@ #if defined(SHADER_API_PSSL) #pragma PSSL_target_output_format(target 0 FMT_FP16_ABGR) +#pragma PSSL_target_output_format(target 1 FMT_FP16_ABGR) #endif struct DepthClipOutputsFS From 9aee5758b6bfedf6a6bd0c17291ea9da177f572e Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Mon, 24 Mar 2025 19:36:25 +0100 Subject: [PATCH 64/88] Made some progress towards making ASR FP16 work on PS5: - Auto-gen reactive mask & Reconstruct previous depth & Depth Clip passes now verified correct and working - Changed dilated reactive masks texture format to R16G16_SFloat, which Agc seems to like better as an output format than R8G8_UNorm - Accumulate pass is the only real (big) problem left --- .../Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs index 89ab6f8..0ad7ef5 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs @@ -141,7 +141,7 @@ namespace ArmASR commandBuffer.GetTemporaryRT(AsrShaderIDs.UavLockInputLuma, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R16_SFloat, 1, true); // FSR2_DilatedReactiveMasks: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8G8_UNORM, FFX_RESOURCE_FLAGS_ALIASABLE - commandBuffer.GetTemporaryRT(AsrShaderIDs.UavDilatedReactiveMasks, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R8G8_UNorm, 1, true); + commandBuffer.GetTemporaryRT(AsrShaderIDs.UavDilatedReactiveMasks, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R16G16_SFloat, 1, true); // FSR2_PreparedInputColor: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE commandBuffer.GetTemporaryRT(AsrShaderIDs.UavPreparedInputColor, maxRenderSize.x, maxRenderSize.y, 0, default, preparedInputColorNeedsFp16 ? GraphicsFormat.R16G16B16A16_SFloat : GraphicsFormat.R8G8B8A8_UNorm, 1, true); From 2f8a0ca595280cad8082d00b9d32c432a8e55962 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Mon, 24 Mar 2025 19:59:23 +0100 Subject: [PATCH 65/88] Added output target pragmas for random write targets. Not sure if these are necessary but it's something to keep an eye on. --- .../ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl | 1 + .../shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl | 1 + 2 files changed, 2 insertions(+) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl index 10d9508..e3ba114 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl @@ -60,6 +60,7 @@ #pragma PSSL_target_output_format(target 0 FMT_FP16_ABGR) #pragma PSSL_target_output_format(target 1 FMT_FP16_ABGR) #pragma PSSL_target_output_format(target 2 FMT_FP16_ABGR) +#pragma PSSL_target_output_format(target 4 FMT_32_R) #endif struct AccumulateOutputsFS diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl index c07b3c2..1c27721 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl @@ -45,6 +45,7 @@ #pragma PSSL_target_output_format(target 0 FMT_32_R) #pragma PSSL_target_output_format(target 1 FMT_FP16_ABGR) #pragma PSSL_target_output_format(target 2 FMT_32_R) +#pragma PSSL_target_output_format(target 3 FMT_32_R) #endif struct ReconstructPrevDepthOutputsFS From 90490e88192b53eac4431d845860d942a398106a Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Tue, 25 Mar 2025 10:47:06 +0100 Subject: [PATCH 66/88] Temp disabled Native16Bit again, as it does cause compilation issues when building for standalone. Need to make a split between Modern and Legacy here. --- .../Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc index 2e3c9b2..83c4ae2 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc @@ -7,7 +7,7 @@ // - DX11: forces use of DXC which is not supported // - XB1: native FP16 seems to be unsupported #if defined(SHADER_API_VULKAN) -#pragma require Native16Bit +//#pragma require Native16Bit // *sigh* Unity STILL ignores the #if guard when compiling for standalone #endif // If these keywords are set by Unity, redefine them to have a truthy value From cc74a9cd2fb7af3dd0a3335aee12f9c8f7a09315 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Tue, 25 Mar 2025 11:00:30 +0100 Subject: [PATCH 67/88] Use implicit register binding for fragment shader random write targets. Fixes UAVs not binding properly on PS5. --- .../ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h index 6159912..1963bd8 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h @@ -379,7 +379,11 @@ SamplerState s_LinearClamp : register(s1); // UAV declarations #if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH + #if defined(SHADER_API_PSSL) + RWTexture2D rw_reconstructed_previous_nearest_depth; // Need to use implicit register binding for random write targets on PS4/5 + #else RWTexture2D rw_reconstructed_previous_nearest_depth : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH); + #endif #endif #if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS RWTexture2D rw_dilated_motion_vectors : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_MOTION_VECTORS); From c52a40f706f7498f38ca558a8abee2371b03c0ec Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Tue, 25 Mar 2025 12:07:08 +0100 Subject: [PATCH 68/88] Use implicit register binding when using UAVs in fragment shaders. Fixes remaining issues with reconstruct and accumulate passes on PS5 CGGC. Also disable writing to locks UAV at the end of Accumulate pass, as it doesn't actually do anything and it might cause issues with reading & writing to the same texture. --- .../ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h | 2 +- .../ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h index 1120019..73404b5 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h @@ -372,7 +372,7 @@ AccumulateOutputs Accumulate(FfxInt32x2 iPxHrPos) #if FFXM_FSR2_OPTION_APPLY_SHARPENING == 0 results.fColor = fHistoryColor; #endif - StoreNewLocks(iPxHrPos, 0); + //StoreNewLocks(iPxHrPos, 0); return results; } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h index 1963bd8..77b235d 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h @@ -379,7 +379,7 @@ SamplerState s_LinearClamp : register(s1); // UAV declarations #if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH - #if defined(SHADER_API_PSSL) + #if defined(SHADER_API_PSSL) && defined(SHADER_STAGE_FRAGMENT) RWTexture2D rw_reconstructed_previous_nearest_depth; // Need to use implicit register binding for random write targets on PS4/5 #else RWTexture2D rw_reconstructed_previous_nearest_depth : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH); @@ -401,7 +401,11 @@ SamplerState s_LinearClamp : register(s1); RWTexture2D rw_lock_input_luma : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_INPUT_LUMA); #endif #if defined FSR2_BIND_UAV_NEW_LOCKS + #if defined(SHADER_API_PSSL) && defined(SHADER_STAGE_FRAGMENT) + RWTexture2D rw_new_locks; // Need to use implicit register binding for random write targets on PS4/5 + #else RWTexture2D rw_new_locks : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_NEW_LOCKS); + #endif #endif #if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR RWTexture2D rw_prepared_input_color : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR); From 51e5a86112942bc0312912527ad683ce3c3c5f20 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Tue, 25 Mar 2025 12:18:34 +0100 Subject: [PATCH 69/88] Reverted dilated reactive masks format back to R8G8_UNorm, as that works correctly now --- .../Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs index 0ad7ef5..89ab6f8 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs @@ -141,7 +141,7 @@ namespace ArmASR commandBuffer.GetTemporaryRT(AsrShaderIDs.UavLockInputLuma, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R16_SFloat, 1, true); // FSR2_DilatedReactiveMasks: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8G8_UNORM, FFX_RESOURCE_FLAGS_ALIASABLE - commandBuffer.GetTemporaryRT(AsrShaderIDs.UavDilatedReactiveMasks, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R16G16_SFloat, 1, true); + commandBuffer.GetTemporaryRT(AsrShaderIDs.UavDilatedReactiveMasks, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R8G8_UNorm, 1, true); // FSR2_PreparedInputColor: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE commandBuffer.GetTemporaryRT(AsrShaderIDs.UavPreparedInputColor, maxRenderSize.x, maxRenderSize.y, 0, default, preparedInputColorNeedsFp16 ? GraphicsFormat.R16G16B16A16_SFloat : GraphicsFormat.R8G8B8A8_UNorm, 1, true); From 16cf41493a7da3e2f3a8b83a699159803ee105cd Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Tue, 25 Mar 2025 13:11:33 +0100 Subject: [PATCH 70/88] Swapped the order of the render targets around for the depth clip pass. Fixes prepared input color being black in NGGC because... I don't know, probably an alignment issue of some sort? PS5 is being really stupid here. --- .../Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs | 4 ++-- .../ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl | 6 +++--- .../ASR/Shaders/shaders/fsr2/ffxm_fsr2_depth_clip.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs index f8ef944..4ad1628 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs @@ -215,8 +215,8 @@ namespace ArmASR commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvDilatedDepth, AsrShaderIDs.UavDilatedDepth); commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvPrevDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex ^ 1]); - _mrt[0] = AsrShaderIDs.RtDilatedReactiveMasks; // fDilatedReactiveMasks - _mrt[1] = AsrShaderIDs.RtPreparedInputColor; // fTonemapped + _mrt[0] = AsrShaderIDs.RtPreparedInputColor; // fTonemapped + _mrt[1] = AsrShaderIDs.RtDilatedReactiveMasks; // fDilatedReactiveMasks FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); BlitFragment(commandBuffer, _mrt); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl index 86f042a..dd80fa0 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl @@ -52,8 +52,8 @@ struct DepthClipOutputsFS { - FfxFloat32x2 fDilatedReactiveMasks : SV_TARGET0; - FfxFloat32x4 fTonemapped : SV_TARGET1; + FfxFloat32x4 fTonemapped : SV_TARGET0; + FfxFloat32x2 fDilatedReactiveMasks : SV_TARGET1; }; DepthClipOutputsFS main(float4 SvPosition : SV_POSITION) @@ -61,7 +61,7 @@ DepthClipOutputsFS main(float4 SvPosition : SV_POSITION) uint2 uPixelCoord = uint2(SvPosition.xy); DepthClipOutputs result = DepthClip(uPixelCoord); DepthClipOutputsFS output = (DepthClipOutputsFS)0; - output.fDilatedReactiveMasks = result.fDilatedReactiveMasks; output.fTonemapped = result.fTonemapped; + output.fDilatedReactiveMasks = result.fDilatedReactiveMasks; return output; } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_depth_clip.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_depth_clip.h index 2ef4152..0b4a00d 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_depth_clip.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_depth_clip.h @@ -27,8 +27,8 @@ struct DepthClipOutputs { - FfxFloat32x2 fDilatedReactiveMasks; FfxFloat32x4 fTonemapped; + FfxFloat32x2 fDilatedReactiveMasks; }; FFXM_STATIC const FfxFloat32 DepthClipBaseScale = 4.0f; From 3912c66b0d2f77926a1ea858c933974c58364b55 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Tue, 25 Mar 2025 13:58:41 +0100 Subject: [PATCH 71/88] Bind new locks to the accumulate shader as a regular SRV (read-only), and clear the locks buffer ahead of time instead of at the end of the accumulate shader. This simplifies the shader binding setup, as well as being "more correct" because we're using a temporary RT for the locks buffer, meaning it makes no sense to clear it for the next frame. --- .../Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs | 3 +++ .../Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs | 4 +--- .../ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl | 4 +--- .../Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h | 1 - .../Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h | 4 ++-- 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs index c180165..0d50ed9 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs @@ -198,6 +198,9 @@ namespace ArmASR commandBuffer.SetRenderTarget(AsrShaderIDs.UavReconstructedPrevNearestDepth); commandBuffer.ClearRenderTarget(false, true, depthInverted ? Color.clear : Color.white); + commandBuffer.SetRenderTarget(AsrShaderIDs.UavNewLocks); + commandBuffer.ClearRenderTarget(false, true, Color.clear); + // Auto exposure SetupSpdConstants(dispatchParams, out var dispatchThreadGroupCount); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs index 4ad1628..764e02b 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs @@ -277,9 +277,7 @@ namespace ArmASR commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvAutoExposure, Resources.AutoExposure[frameIndex]); commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvLumaHistory, Resources.LumaHistory[frameIndex ^ 1]); commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvInternalTemporalReactive, Resources.InternalReactive[frameIndex ^ 1]); - - // UAV binding in fragment shader, index needs to match the register binding in HLSL - commandBuffer.SetRandomWriteTarget(4, AsrShaderIDs.UavNewLocks); + commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvNewLocks, AsrShaderIDs.UavNewLocks); if (ContextDescription.Variant == Asr.Variant.Quality) { diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl index e3ba114..8c23aab 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl @@ -35,8 +35,7 @@ #define FSR2_BIND_SRV_AUTO_EXPOSURE 9 #define FSR2_BIND_SRV_LUMA_HISTORY 10 #define FSR2_BIND_SRV_TEMPORAL_REACTIVE 11 - -#define FSR2_BIND_UAV_NEW_LOCKS 4 +#define FSR2_BIND_SRV_NEW_LOCKS 12 #define FSR2_BIND_CB_FSR2 0 @@ -60,7 +59,6 @@ #pragma PSSL_target_output_format(target 0 FMT_FP16_ABGR) #pragma PSSL_target_output_format(target 1 FMT_FP16_ABGR) #pragma PSSL_target_output_format(target 2 FMT_FP16_ABGR) -#pragma PSSL_target_output_format(target 4 FMT_32_R) #endif struct AccumulateOutputsFS diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h index 73404b5..35769f0 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h @@ -372,7 +372,6 @@ AccumulateOutputs Accumulate(FfxInt32x2 iPxHrPos) #if FFXM_FSR2_OPTION_APPLY_SHARPENING == 0 results.fColor = fHistoryColor; #endif - //StoreNewLocks(iPxHrPos, 0); return results; } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h index 752a39a..b6d20b6 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h @@ -331,7 +331,7 @@ void ReprojectHistoryColor(const AccumulationPassCommonParams params, FFXM_PARAM LockState ReprojectHistoryLockStatus(const AccumulationPassCommonParams params, FFXM_PARAMETER_OUT FfxFloat32x2 fReprojectedLockStatus) { LockState state = { FFXM_FALSE, FFXM_FALSE }; - const FfxFloat32 fNewLockIntensity = LoadRwNewLocks(params.iPxHrPos); + const FfxFloat32 fNewLockIntensity = LoadNewLocks(params.iPxHrPos); state.NewLock = fNewLockIntensity > (127.0f / 255.0f); FfxFloat32 fInPlaceLockLifetime = state.NewLock ? fNewLockIntensity : 0; @@ -368,7 +368,7 @@ void ReprojectHistoryColor(const AccumulationPassCommonParams params, FFXM_PARAM LockState ReprojectHistoryLockStatus(const AccumulationPassCommonParams params, FFXM_PARAMETER_OUT FfxFloat16x2 fReprojectedLockStatus) { LockState state = { FFXM_FALSE, FFXM_FALSE }; - const FfxFloat16 fNewLockIntensity = FfxFloat16(LoadRwNewLocks(params.iPxHrPos)); + const FfxFloat16 fNewLockIntensity = FfxFloat16(LoadNewLocks(params.iPxHrPos)); state.NewLock = fNewLockIntensity > (127.0f / 255.0f); FfxFloat16 fInPlaceLockLifetime = state.NewLock ? fNewLockIntensity : FfxFloat16(0); From 3293533bf9d8d9327ccf4f2cce9c347d0c816c3f Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Tue, 25 Mar 2025 14:57:04 +0100 Subject: [PATCH 72/88] Minor: print the ASR variant to the log when creating the context, makes for easier readback of the log --- .../PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs index 1074a9c..c9a5511 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs @@ -44,7 +44,7 @@ namespace ArmASR flags |= InitializationFlags.EnableDebugChecking; #endif - Debug.Log($"Setting up ASR with render size: {maxRenderSize.x}x{maxRenderSize.y}, display size: {displaySize.x}x{displaySize.y}, flags: {flags}"); + Debug.Log($"Setting up ASR {variant} with render size: {maxRenderSize.x}x{maxRenderSize.y}, display size: {displaySize.x}x{displaySize.y}, flags: {flags}"); var contextDescription = new ContextDescription { From 89269073f78e7fe631406616b68aeca26f89ea5a Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Tue, 25 Mar 2025 15:42:08 +0100 Subject: [PATCH 73/88] Removed random write requirement from all render textures that aren't used as UAV (i.e. are used as fragment render targets). Sort of makes GLES3 work, and might provide an appreciable overall efficiency boost as well. --- .../Effects/Upscaling/ASR/Runtime/AsrResources.cs | 14 +++++++------- .../Runtime/Effects/Upscaling/ASRUpscaler.cs | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs index 89ab6f8..6b20aff 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs @@ -98,7 +98,7 @@ namespace ArmASR SceneLuminance.Create(); // Resource FSR2_AutoExposure: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32G32_FLOAT, FFX_RESOURCE_FLAGS_NONE - CreateDoubleBufferedResource(AutoExposure, "ASR_AutoExposure", Vector2Int.one, rg16Format); + CreateDoubleBufferedResource(AutoExposure, "ASR_AutoExposure", Vector2Int.one, rg16Format, enableRandomWrite: true); // Resources FSR2_InternalDilatedVelocity1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16_FLOAT, FFX_RESOURCE_FLAGS_NONE CreateDoubleBufferedResource(DilatedMotionVectors, "ASR_InternalDilatedVelocity", contextDescription.MaxRenderSize, GraphicsFormat.R16G16_SFloat); @@ -135,16 +135,16 @@ namespace ArmASR commandBuffer.GetTemporaryRT(AsrShaderIDs.UavReconstructedPrevNearestDepth, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R32_UInt, 1, true); // FSR2_DilatedDepth: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE - commandBuffer.GetTemporaryRT(AsrShaderIDs.UavDilatedDepth, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R32_SFloat, 1, true); + commandBuffer.GetTemporaryRT(AsrShaderIDs.UavDilatedDepth, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R32_SFloat, 1); // FSR2_LockInputLuma: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE - commandBuffer.GetTemporaryRT(AsrShaderIDs.UavLockInputLuma, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R16_SFloat, 1, true); + commandBuffer.GetTemporaryRT(AsrShaderIDs.UavLockInputLuma, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R16_SFloat, 1); // FSR2_DilatedReactiveMasks: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8G8_UNORM, FFX_RESOURCE_FLAGS_ALIASABLE - commandBuffer.GetTemporaryRT(AsrShaderIDs.UavDilatedReactiveMasks, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R8G8_UNorm, 1, true); + commandBuffer.GetTemporaryRT(AsrShaderIDs.UavDilatedReactiveMasks, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R8G8_UNorm, 1); // FSR2_PreparedInputColor: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE - commandBuffer.GetTemporaryRT(AsrShaderIDs.UavPreparedInputColor, maxRenderSize.x, maxRenderSize.y, 0, default, preparedInputColorNeedsFp16 ? GraphicsFormat.R16G16B16A16_SFloat : GraphicsFormat.R8G8B8A8_UNorm, 1, true); + commandBuffer.GetTemporaryRT(AsrShaderIDs.UavPreparedInputColor, maxRenderSize.x, maxRenderSize.y, 0, default, preparedInputColorNeedsFp16 ? GraphicsFormat.R16G16B16A16_SFloat : GraphicsFormat.R8G8B8A8_UNorm, 1); // FSR2_NewLocks: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_ALIASABLE commandBuffer.GetTemporaryRT(AsrShaderIDs.UavNewLocks, displaySize.x, displaySize.y, 0, default, r8Format, 1, true); @@ -177,11 +177,11 @@ namespace ArmASR commandBuffer.ReleaseTemporaryRT(AsrShaderIDs.UavNewLocks); } - private static void CreateDoubleBufferedResource(RenderTexture[] resource, string name, Vector2Int size, GraphicsFormat format) + private static void CreateDoubleBufferedResource(RenderTexture[] resource, string name, Vector2Int size, GraphicsFormat format, bool enableRandomWrite = false) { for (int i = 0; i < 2; ++i) { - resource[i] = new RenderTexture(size.x, size.y, 0, format) { name = name + (i + 1), enableRandomWrite = true }; + resource[i] = new RenderTexture(size.x, size.y, 0, format) { name = name + (i + 1), enableRandomWrite = enableRandomWrite }; resource[i].Create(); } } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs index 172520f..e0a8b62 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs @@ -48,7 +48,7 @@ namespace UnityEngine.Rendering.PostProcessing SetupAutoReactiveDescription(context, config); var scaledRenderSize = _genReactiveDescription.RenderSize; - cmd.GetTemporaryRT(AsrShaderIDs.UavAutoReactive, scaledRenderSize.x, scaledRenderSize.y, 0, default, GraphicsFormat.R8_UNorm, 1, true); + cmd.GetTemporaryRT(AsrShaderIDs.UavAutoReactive, scaledRenderSize.x, scaledRenderSize.y, 0, default, GraphicsFormat.R8_UNorm, 1); _genReactiveDescription.OutReactive = new ResourceView(AsrShaderIDs.UavAutoReactive); _asrContext.GenerateReactiveMask(_genReactiveDescription, cmd); _dispatchDescription.Reactive = new ResourceView(AsrShaderIDs.UavAutoReactive); From 5faf6c2a56e9ec8224fc08d2afbf2ab749955176 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Tue, 25 Mar 2025 16:51:45 +0100 Subject: [PATCH 74/88] Reworked ASR shaders for modern compilation on DX12, Vulkan, Metal, PS5 and Xbox Series: - Use DXC compiler - Enable native 16-bit - Use wave operations where supported --- .../ASR/Shaders/ffxm_fsr2_common.cginc | 7 ------ ...sr2_compute_luminance_pyramid_pass.compute | 14 ----------- ...hader => ffxm_fsr2_fragment_modern.shader} | 17 ++++++++++++- ... => ffxm_fsr2_fragment_modern.shader.meta} | 0 ....compute => ffxm_fsr2_lock_modern.compute} | 3 +++ ...eta => ffxm_fsr2_lock_modern.compute.meta} | 0 .../ffxm_fsr2_luma_pyramid_modern.compute | 24 +++++++++++++++++++ ...fxm_fsr2_luma_pyramid_modern.compute.meta} | 0 8 files changed, 43 insertions(+), 22 deletions(-) delete mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute rename Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/{ffxm_fsr2_fs.shader => ffxm_fsr2_fragment_modern.shader} (85%) rename Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/{ffxm_fsr2_fs.shader.meta => ffxm_fsr2_fragment_modern.shader.meta} (100%) rename Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/{ffxm_fsr2_lock_pass.compute => ffxm_fsr2_lock_modern.compute} (79%) rename Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/{ffxm_fsr2_lock_pass.compute.meta => ffxm_fsr2_lock_modern.compute.meta} (100%) create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute rename Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/{ffxm_fsr2_compute_luminance_pyramid_pass.compute.meta => ffxm_fsr2_luma_pyramid_modern.compute.meta} (100%) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc index 5be235b..9acbae8 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc @@ -3,13 +3,6 @@ #pragma warning(disable: 3205) // Conversion from larger type to smaller, possible loss of data #pragma warning(disable: 3556) // Integer divides might be much slower, try using uints if possible -// Doesn't work for: -// - DX11: forces use of DXC which is not supported -// - XB1: native FP16 seems to be unsupported -#if defined(SHADER_API_VULKAN) || defined(SHADER_API_PSSL) -//#pragma require Native16Bit // *sigh* Unity STILL ignores the #if guard when compiling for standalone -#endif - // If these keywords are set by Unity, redefine them to have a truthy value #ifdef FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE #undef FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute deleted file mode 100644 index 5d4044b..0000000 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute +++ /dev/null @@ -1,14 +0,0 @@ -#pragma kernel main - -#pragma multi_compile __ FFXM_HALF -#pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS -#pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS -#pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH - -#pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY - -#include "ffxm_fsr2_common.cginc" - -#define FFXM_SPD_NO_WAVE_OPERATIONS - -#include "shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl" diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader similarity index 85% rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader index 5be5aae..9ab1cb9 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader @@ -1,4 +1,4 @@ -Shader "TND/ASR/ffx_fsr2_fs" +Shader "TND/ASR/ffxm_fsr2_fragment_modern" { SubShader { @@ -12,6 +12,9 @@ Shader "TND/ASR/ffx_fsr2_fs" #pragma vertex VertMain #pragma fragment main #pragma target 4.5 + #pragma only_renderers d3d11 vulkan metal ps5 xboxseries + #pragma use_dxc + #pragma require Native16Bit //#pragma enable_d3d11_debug_symbols #pragma multi_compile __ FFXM_HALF @@ -32,6 +35,9 @@ Shader "TND/ASR/ffx_fsr2_fs" #pragma vertex VertMain #pragma fragment main #pragma target 4.5 + #pragma only_renderers d3d11 vulkan metal ps5 xboxseries + #pragma use_dxc + #pragma require Native16Bit //#pragma enable_d3d11_debug_symbols #pragma multi_compile __ FFXM_HALF @@ -56,6 +62,9 @@ Shader "TND/ASR/ffx_fsr2_fs" #pragma vertex VertMain #pragma fragment main #pragma target 4.5 + #pragma only_renderers d3d11 vulkan metal ps5 xboxseries + #pragma use_dxc + #pragma require Native16Bit //#pragma enable_d3d11_debug_symbols #pragma multi_compile __ FFXM_HALF @@ -81,6 +90,9 @@ Shader "TND/ASR/ffx_fsr2_fs" #pragma vertex VertMain #pragma fragment main #pragma target 4.5 + #pragma only_renderers d3d11 vulkan metal ps5 xboxseries + #pragma use_dxc + #pragma require Native16Bit //#pragma enable_d3d11_debug_symbols #pragma multi_compile __ FFXM_HALF @@ -108,6 +120,9 @@ Shader "TND/ASR/ffx_fsr2_fs" #pragma vertex VertMain #pragma fragment main #pragma target 4.5 + #pragma only_renderers d3d11 vulkan metal ps5 xboxseries + #pragma use_dxc + #pragma require Native16Bit //#pragma enable_d3d11_debug_symbols #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader.meta similarity index 100% rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fs.shader.meta rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute similarity index 79% rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute index 660b922..e3c6aab 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute @@ -1,4 +1,7 @@ #pragma kernel main +#pragma only_renderers d3d11 vulkan metal ps5 xboxseries +#pragma use_dxc +#pragma require Native16Bit #pragma multi_compile __ FFXM_HALF #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute.meta similarity index 100% rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_pass.compute.meta rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute new file mode 100644 index 0000000..31b26cf --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute @@ -0,0 +1,24 @@ +#pragma kernel main +#pragma only_renderers d3d11 vulkan metal ps5 xboxseries +#pragma use_dxc +#pragma require Native16Bit + +#pragma multi_compile __ FFXM_HALF +#pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH + +#pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY + +#include "ffxm_fsr2_common.cginc" + +// Enable wave operations for the platforms that support it +#if (defined(SHADER_API_D3D12) || defined(SHADER_API_VULKAN) || defined(SHADER_API_METAL) || defined(SHADER_API_GAMECORE)) && !defined(SHADER_API_MOBILE) +#pragma require WaveBasic // Required for WaveGetLaneIndex +#pragma require WaveBallot // Required for WaveReadLaneAt +#pragma require QuadShuffle // Required for QuadReadAcross +#else +#define FFXM_SPD_NO_WAVE_OPERATIONS +#endif + +#include "shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl" diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute.meta similarity index 100% rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_compute_luminance_pyramid_pass.compute.meta rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute.meta From facfd037d2c1a317778cce54ca0289b3708e8081 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Tue, 25 Mar 2025 18:38:54 +0100 Subject: [PATCH 75/88] Added Legacy variants of the ASR shaders, without DXC, Native16Bit or wave operations. Separated them into an AsrShaderBundle that can be selected from at run-time based on the current graphics device. --- .../PostProcessing/PostProcessResources.asset | 11 +- .../Effects/Upscaling/ASR/Runtime/Asr.cs | 4 +- .../Upscaling/ASR/Runtime/AsrAssets.cs | 44 ++++++- .../Shaders/ffxm_fsr2_fragment_legacy.shader | 122 ++++++++++++++++++ .../ffxm_fsr2_fragment_legacy.shader.meta | 9 ++ .../Shaders/ffxm_fsr2_fragment_modern.shader | 2 + .../ASR/Shaders/ffxm_fsr2_lock_legacy.compute | 12 ++ .../ffxm_fsr2_lock_legacy.compute.meta | 3 + .../ffxm_fsr2_luma_pyramid_legacy.compute | 15 +++ ...ffxm_fsr2_luma_pyramid_legacy.compute.meta | 3 + .../Runtime/PostProcessResources.cs | 2 +- 11 files changed, 216 insertions(+), 11 deletions(-) create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_legacy.shader create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_legacy.shader.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_legacy.compute create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_legacy.compute.meta create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute create mode 100644 Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute.meta diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset index 36cc0e2..8aa70bb 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset @@ -160,6 +160,11 @@ MonoBehaviour: activate: {fileID: 7200000, guid: d7de362950af6fe4e90da7d6e32f9826, type: 3} upscale: {fileID: 7200000, guid: 5d28d29787492b74aa736a21f70572c7, type: 3} asrUpscalerShaders: - fragmentShader: {fileID: 4800000, guid: 147cc2cffac69ef4eb3ea8addafc9d10, type: 3} - computeLuminancePyramidPass: {fileID: 7200000, guid: 57220d870cb441c8a6df8a9e15a74283, type: 3} - lockPass: {fileID: 7200000, guid: a6e1d5d5372d467790fcf2d089b50ef7, type: 3} + legacyShaders: + fragmentShader: {fileID: 4800000, guid: 42e5314e46109a441a4527349d8df6e4, type: 3} + computeLuminancePyramidPass: {fileID: 7200000, guid: 41d0c3a77d97a904e96ebc2bf18129f6, type: 3} + lockPass: {fileID: 7200000, guid: a09277df48840a84196b3bac299544ea, type: 3} + modernShaders: + fragmentShader: {fileID: 4800000, guid: 147cc2cffac69ef4eb3ea8addafc9d10, type: 3} + computeLuminancePyramidPass: {fileID: 7200000, guid: 57220d870cb441c8a6df8a9e15a74283, type: 3} + lockPass: {fileID: 7200000, guid: a6e1d5d5372d467790fcf2d089b50ef7, type: 3} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs index c9a5511..9c414b1 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs @@ -33,7 +33,7 @@ namespace ArmASR /// /// Creates a new ASR context with standard parameters that are appropriate for the current platform. /// - public static AsrContext CreateContext(Variant variant, Vector2Int displaySize, Vector2Int maxRenderSize, AsrShaders shaders, InitializationFlags flags = 0) + public static AsrContext CreateContext(Variant variant, Vector2Int displaySize, Vector2Int maxRenderSize, AsrShaderBundle shaders, InitializationFlags flags = 0) { if (SystemInfo.usesReversedZBuffer) flags |= InitializationFlags.EnableDepthInverted; @@ -52,7 +52,7 @@ namespace ArmASR Variant = variant, DisplaySize = displaySize, MaxRenderSize = maxRenderSize, - Shaders = shaders, + Shaders = shaders.GetShadersForCurrentPlatform(), }; var context = new AsrContext(); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs index b44859c..3a910a0 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs @@ -19,6 +19,7 @@ // THE SOFTWARE. using UnityEngine; +using UnityEngine.Rendering; namespace ArmASR { @@ -29,16 +30,25 @@ namespace ArmASR [CreateAssetMenu(fileName = "ASR Assets", menuName = "ARM/ASR Assets", order = 1102)] public class AsrAssets : ScriptableObject { - public AsrShaders shaders; + public AsrShaderBundle shaderBundle; #if UNITY_EDITOR private void Reset() { - shaders = new AsrShaders + shaderBundle = new AsrShaderBundle { - fragmentShader = FindFragmentShader("ffxm_fsr2_fs"), - computeLuminancePyramidPass = FindComputeShader("ffxm_fsr2_compute_luminance_pyramid_pass"), - lockPass = FindComputeShader("ffxm_fsr2_lock_pass"), + legacyShaders = new AsrShaders + { + fragmentShader = FindFragmentShader("ffxm_fsr2_fragment_legacy"), + computeLuminancePyramidPass = FindComputeShader("ffxm_fsr2_luma_pyramid_legacy"), + lockPass = FindComputeShader("ffxm_fsr2_lock_legacy"), + }, + modernShaders = new AsrShaders + { + fragmentShader = FindFragmentShader("ffxm_fsr2_fragment_modern"), + computeLuminancePyramidPass = FindComputeShader("ffxm_fsr2_luma_pyramid_modern"), + lockPass = FindComputeShader("ffxm_fsr2_lock_modern"), + }, }; } @@ -63,6 +73,30 @@ namespace ArmASR } #endif } + + [System.Serializable] + public class AsrShaderBundle + { + public AsrShaders legacyShaders; + + public AsrShaders modernShaders; + + public AsrShaders GetShadersForCurrentPlatform() + { + switch (SystemInfo.graphicsDeviceType) + { + case GraphicsDeviceType.Direct3D12: + case GraphicsDeviceType.Vulkan: + case GraphicsDeviceType.Metal: + case GraphicsDeviceType.PlayStation5: + case GraphicsDeviceType.PlayStation5NGGC: + case GraphicsDeviceType.GameCoreXboxSeries: + return modernShaders; + default: + return legacyShaders; + } + } + } /// /// All the compute shaders used by ASR. diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_legacy.shader b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_legacy.shader new file mode 100644 index 0000000..236b507 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_legacy.shader @@ -0,0 +1,122 @@ +Shader "TND/ASR/ffxm_fsr2_fragment_legacy" +{ + SubShader + { + Cull Off ZWrite Off ZTest Always + + Pass // 0 + { + Name "Auto-Generate Reactive Mask" + + HLSLPROGRAM + #pragma vertex VertMain + #pragma fragment main + #pragma target 4.5 + //#pragma enable_d3d11_debug_symbols + + #pragma multi_compile __ FFXM_HALF + #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY + + #include "ffxm_fsr2_common.cginc" + #include "shaders/ffxm_fsr2_vs.hlsl" + #include "shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl" + + ENDHLSL + } + + Pass // 1 + { + Name "Reconstruct Previous Depth" + + HLSLPROGRAM + #pragma vertex VertMain + #pragma fragment main + #pragma target 4.5 + //#pragma enable_d3d11_debug_symbols + + #pragma multi_compile __ FFXM_HALF + #pragma multi_compile __ FFXM_FSR2_OPTION_HDR_COLOR_INPUT + #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS + #pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS + #pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH + #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY + + #include "ffxm_fsr2_common.cginc" + #include "shaders/ffxm_fsr2_vs.hlsl" + #include "shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl" + + ENDHLSL + } + + Pass // 2 + { + Name "Depth Clip" + + HLSLPROGRAM + #pragma vertex VertMain + #pragma fragment main + #pragma target 4.5 + //#pragma enable_d3d11_debug_symbols + + #pragma multi_compile __ FFXM_HALF + #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_BALANCED + #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE + #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS + #pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS + #pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH + #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY + + #include "ffxm_fsr2_common.cginc" + #include "shaders/ffxm_fsr2_vs.hlsl" + #include "shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl" + + ENDHLSL + } + + Pass // 3 + { + Name "Accumulate" + + HLSLPROGRAM + #pragma vertex VertMain + #pragma fragment main + #pragma target 4.5 + //#pragma enable_d3d11_debug_symbols + + #pragma multi_compile __ FFXM_HALF + #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_BALANCED + #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE + #pragma multi_compile __ FFXM_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE + #pragma multi_compile __ FFXM_FSR2_OPTION_HDR_COLOR_INPUT + #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS + #pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS + #pragma multi_compile __ FFXM_FSR2_OPTION_APPLY_SHARPENING + #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY + + #include "ffxm_fsr2_common.cginc" + #include "shaders/ffxm_fsr2_vs.hlsl" + #include "shaders/ffxm_fsr2_accumulate_pass_fs.hlsl" + + ENDHLSL + } + + Pass // 4 + { + Name "Sharpen" + + HLSLPROGRAM + #pragma vertex VertMain + #pragma fragment main + #pragma target 4.5 + //#pragma enable_d3d11_debug_symbols + + #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY + + #include "ffxm_fsr2_common.cginc" + #include "shaders/ffxm_fsr2_vs.hlsl" + #include "shaders/ffxm_fsr2_rcas_pass_fs.hlsl" + + ENDHLSL + } + } +} diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_legacy.shader.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_legacy.shader.meta new file mode 100644 index 0000000..4fc6554 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_legacy.shader.meta @@ -0,0 +1,9 @@ +fileFormatVersion: 2 +guid: 42e5314e46109a441a4527349d8df6e4 +ShaderImporter: + externalObjects: {} + defaultTextures: [] + nonModifiableTextures: [] + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader index 9ab1cb9..ec7f5a8 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader @@ -134,4 +134,6 @@ Shader "TND/ASR/ffxm_fsr2_fragment_modern" ENDHLSL } } + + Fallback "TND/ASR/ffxm_fsr2_fragment_legacy" } diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_legacy.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_legacy.compute new file mode 100644 index 0000000..660b922 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_legacy.compute @@ -0,0 +1,12 @@ +#pragma kernel main + +#pragma multi_compile __ FFXM_HALF +#pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH + +#pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY + +#include "ffxm_fsr2_common.cginc" + +#include "shaders/ffxm_fsr2_lock_pass.hlsl" diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_legacy.compute.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_legacy.compute.meta new file mode 100644 index 0000000..2bfe598 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_legacy.compute.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: a09277df48840a84196b3bac299544ea +timeCreated: 1742417134 \ No newline at end of file diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute new file mode 100644 index 0000000..0aeae34 --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute @@ -0,0 +1,15 @@ +#pragma kernel main + +#pragma multi_compile __ FFXM_HALF +#pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH + +#pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY + +#include "ffxm_fsr2_common.cginc" + +// Disable wave operations altogether +#define FFXM_SPD_NO_WAVE_OPERATIONS + +#include "shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl" diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute.meta new file mode 100644 index 0000000..e102c9d --- /dev/null +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: 41d0c3a77d97a904e96ebc2bf18129f6 +timeCreated: 1742416757 \ No newline at end of file diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessResources.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessResources.cs index 867095e..ef3feaa 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessResources.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessResources.cs @@ -297,7 +297,7 @@ namespace UnityEngine.Rendering.PostProcessing /// /// Shaders used by the Arm Accuracy Super Resolution (ASR) Upscaler. /// - public AsrShaders asrUpscalerShaders; + public AsrShaderBundle asrUpscalerShaders; #if UNITY_EDITOR /// From 4c6a7b7309f7e885c9b195151ec9b70f4a4a7f4f Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Tue, 25 Mar 2025 18:39:29 +0100 Subject: [PATCH 76/88] Removed Native16Bit from the two compute shaders because Xbox Series does not like it. Seems fine for the fragment shader though, which is where it matters most. --- .../Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute | 1 - .../Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute | 1 - 2 files changed, 2 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute index e3c6aab..f15eb80 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute @@ -1,7 +1,6 @@ #pragma kernel main #pragma only_renderers d3d11 vulkan metal ps5 xboxseries #pragma use_dxc -#pragma require Native16Bit #pragma multi_compile __ FFXM_HALF #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute index 31b26cf..bcac985 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute @@ -1,7 +1,6 @@ #pragma kernel main #pragma only_renderers d3d11 vulkan metal ps5 xboxseries #pragma use_dxc -#pragma require Native16Bit #pragma multi_compile __ FFXM_HALF #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS From 4a8f9455e600138fa6bb7d1b3dcba291c2372ab8 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Tue, 25 Mar 2025 18:45:27 +0100 Subject: [PATCH 77/88] Removed the old license text from the C# source files --- .../Effects/Upscaling/ASR/Runtime/Asr.cs | 22 +------------------ .../Upscaling/ASR/Runtime/AsrAssets.cs | 22 +------------------ .../Upscaling/ASR/Runtime/AsrCallbacks.cs | 22 +------------------ .../Upscaling/ASR/Runtime/AsrContext.cs | 22 +------------------ .../Effects/Upscaling/ASR/Runtime/AsrPass.cs | 22 +------------------ .../Upscaling/ASR/Runtime/AsrResources.cs | 22 +------------------ .../Upscaling/ASR/Runtime/AsrShaderIDs.cs | 22 +------------------ .../Upscaling/ASR/Runtime/ResourceView.cs | 20 ----------------- 8 files changed, 7 insertions(+), 167 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs index 9c414b1..2095317 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs @@ -1,24 +1,4 @@ -// Copyright (c) 2024 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -using System; +using System; using System.Runtime.InteropServices; using UnityEngine; using UnityEngine.Rendering; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs index 3a910a0..1d65342 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs @@ -1,24 +1,4 @@ -// Copyright (c) 2024 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -using UnityEngine; +using UnityEngine; using UnityEngine.Rendering; namespace ArmASR diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrCallbacks.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrCallbacks.cs index 50fe7a6..505767b 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrCallbacks.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrCallbacks.cs @@ -1,24 +1,4 @@ -// Copyright (c) 2024 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -using UnityEngine; +using UnityEngine; namespace ArmASR { diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs index 0d50ed9..ed69a3a 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs @@ -1,24 +1,4 @@ -// Copyright (c) 2024 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -using System; +using System; using System.Runtime.InteropServices; using UnityEngine; using UnityEngine.Rendering; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs index 764e02b..365f47a 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs @@ -1,24 +1,4 @@ -// Copyright (c) 2024 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -using System; +using System; using System.Diagnostics; using System.Runtime.CompilerServices; using UnityEngine; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs index 6b20aff..31e6634 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs @@ -1,24 +1,4 @@ -// Copyright (c) 2024 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -using System; +using System; using UnityEngine; using UnityEngine.Experimental.Rendering; using UnityEngine.Rendering; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrShaderIDs.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrShaderIDs.cs index 64af0eb..9b1cd1f 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrShaderIDs.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrShaderIDs.cs @@ -1,24 +1,4 @@ -// Copyright (c) 2024 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -using UnityEngine; +using UnityEngine; namespace ArmASR { diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/ResourceView.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/ResourceView.cs index fab2113..f0d6be1 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/ResourceView.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/ResourceView.cs @@ -1,23 +1,3 @@ -// Copyright (c) 2024 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - using UnityEngine.Rendering; namespace ArmASR From cfae0072e9e71d6c8dd5889bc2ed4bce487db0e5 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Tue, 25 Mar 2025 20:16:57 +0100 Subject: [PATCH 78/88] Force enable FP16 shader code path, and redefine FFXM_HALF with a truthy value --- .../Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs | 1 + .../Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs index ed69a3a..1562dd6 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs @@ -52,6 +52,7 @@ namespace ArmASR public void Create(in Asr.ContextDescription contextDescription) { _contextDescription = contextDescription; + _contextDescription.Flags |= Asr.InitializationFlags.EnableFP16Usage; // Always force FP16 code path _upscalerConstantsBuffer = CreateConstantBuffer(); _spdConstantsBuffer = CreateConstantBuffer(); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc index 9acbae8..74adb10 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc @@ -4,6 +4,10 @@ #pragma warning(disable: 3556) // Integer divides might be much slower, try using uints if possible // If these keywords are set by Unity, redefine them to have a truthy value +#if defined(FFXM_HALF) +#undef FFXM_HALF +#define FFXM_HALF (1) +#endif #ifdef FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE #undef FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE #define FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE 1 From 760003744f7e782fccd7923ba811a1db3dceb4b1 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Tue, 25 Mar 2025 20:55:17 +0100 Subject: [PATCH 79/88] Added missing bits of temporal reactive implementation in the FP32 code path, and only bind the relevant inputs to the Accumulate shader based on the ASR mode. Fixes Balanced and Performance modes on Xbox One, which for some reason still seems to be running the FP32 code path, no matter what the FFXM_HALF keyword is set to. --- .../Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs | 6 ++++-- .../ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h | 4 ++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs index 365f47a..505d251 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs @@ -255,18 +255,20 @@ namespace ArmASR commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvUpscaleMaximumBiasLut, Resources.MaximumBiasLut); commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvSceneLuminanceMips, Resources.SceneLuminance); commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvAutoExposure, Resources.AutoExposure[frameIndex]); - commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvLumaHistory, Resources.LumaHistory[frameIndex ^ 1]); - commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvInternalTemporalReactive, Resources.InternalReactive[frameIndex ^ 1]); commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvNewLocks, AsrShaderIDs.UavNewLocks); if (ContextDescription.Variant == Asr.Variant.Quality) { + commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvLumaHistory, Resources.LumaHistory[frameIndex ^ 1]); + _mrt[0] = Resources.InternalUpscaled[frameIndex]; // fColorAndWeight _mrt[1] = Resources.LockStatus[frameIndex]; // fLockStatus _mrt[2] = Resources.LumaHistory[frameIndex]; // fLumaHistory } else { + commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvInternalTemporalReactive, Resources.InternalReactive[frameIndex ^ 1]); + _mrt[0] = Resources.InternalUpscaled[frameIndex]; // fUpscaledColor _mrt[1] = Resources.InternalReactive[frameIndex]; // fTemporalReactive _mrt[2] = Resources.LockStatus[frameIndex]; // fLockStatus diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h index b6d20b6..b8b4c24 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h @@ -324,7 +324,11 @@ void ReprojectHistoryColor(const AccumulationPassCommonParams params, FFXM_PARAM #endif //Compute temporal reactivity info +#if FFXM_SHADER_QUALITY_OPT_SEPARATE_TEMPORAL_REACTIVE + fTemporalReactiveFactor = ffxSaturate(abs(SampleTemporalReactive(params.fReprojectedHrUv))); +#else fTemporalReactiveFactor = ffxSaturate(abs(fHistory.w)); +#endif bInMotionLastFrame = (fHistory.w < 0.0f); } From 3ae6628a34925ce142ef4c1fe648a8f5b9294929 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Wed, 26 Mar 2025 15:46:10 +0100 Subject: [PATCH 80/88] Added a few missing "in" keywords --- .../Effects/Upscaling/ASR/Runtime/AsrPass.cs | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs index 505d251..55d49d3 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs @@ -29,7 +29,7 @@ namespace ArmASR private CustomSampler _sampler; - protected AsrPass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) + protected AsrPass(in Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) { ContextDescription = contextDescription; Resources = resources; @@ -45,14 +45,14 @@ namespace ArmASR } } - public void ScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX = 0, int dispatchY = 0) + public void ScheduleDispatch(CommandBuffer commandBuffer, in Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX = 0, int dispatchY = 0) { BeginSample(commandBuffer); DoScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchX, dispatchY); EndSample(commandBuffer); } - protected abstract void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY); + protected abstract void DoScheduleDispatch(CommandBuffer commandBuffer, in Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY); protected void InitComputeShader(string passName, ComputeShader shader) { @@ -116,7 +116,7 @@ namespace ArmASR { private readonly ComputeBuffer _spdConstants; - public AsrComputeLuminancePyramidPass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants, ComputeBuffer spdConstants) + public AsrComputeLuminancePyramidPass(in Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants, ComputeBuffer spdConstants) : base(contextDescription, resources, constants) { _spdConstants = spdConstants; @@ -124,7 +124,7 @@ namespace ArmASR InitComputeShader("Compute Luminance Pyramid", contextDescription.Shaders.computeLuminancePyramidPass); } - protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, in Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { commandBuffer.SetComputeResourceParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputColor, dispatchParams.Color); commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvAutoExposure, Resources.AutoExposure[frameIndex ^ 1]); @@ -145,13 +145,13 @@ namespace ArmASR { private readonly RenderTargetIdentifier[] _mrt = new RenderTargetIdentifier[3]; - public AsrReconstructPreviousDepthPass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) + public AsrReconstructPreviousDepthPass(in Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) : base(contextDescription, resources, constants) { InitFragmentShader("Reconstruct & Dilate", contextDescription.Shaders.fragmentShader, 1); } - protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, in Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputColor, dispatchParams.Color); commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputDepth, dispatchParams.Depth); @@ -175,13 +175,13 @@ namespace ArmASR { private readonly RenderTargetIdentifier[] _mrt = new RenderTargetIdentifier[2]; - public AsrDepthClipPass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) + public AsrDepthClipPass(in Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) : base(contextDescription, resources, constants) { InitFragmentShader("Depth Clip", contextDescription.Shaders.fragmentShader, 2); } - protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, in Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputColor, dispatchParams.Color); commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputDepth, dispatchParams.Depth); @@ -205,13 +205,13 @@ namespace ArmASR internal class AsrLockPass : AsrPass { - public AsrLockPass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) + public AsrLockPass(in Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) : base(contextDescription, resources, constants) { InitComputeShader("Create Locks", contextDescription.Shaders.lockPass); } - protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, in Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvLockInputLuma, AsrShaderIDs.UavLockInputLuma); @@ -228,13 +228,13 @@ namespace ArmASR { private readonly RenderTargetIdentifier[] _mrt = new RenderTargetIdentifier[4]; - public AsrAccumulatePass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) + public AsrAccumulatePass(in Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants) : base(contextDescription, resources, constants) { InitFragmentShader("Reproject & Accumulate", contextDescription.Shaders.fragmentShader, 3); } - protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, in Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { if ((ContextDescription.Flags & Asr.InitializationFlags.EnableDisplayResolutionMotionVectors) == 0) { @@ -286,7 +286,7 @@ namespace ArmASR { private readonly ComputeBuffer _rcasConstants; - public AsrSharpenPass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants, ComputeBuffer rcasConstants) + public AsrSharpenPass(in Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants, ComputeBuffer rcasConstants) : base(contextDescription, resources, constants) { _rcasConstants = rcasConstants; @@ -294,7 +294,7 @@ namespace ArmASR InitFragmentShader("RCAS Sharpening", contextDescription.Shaders.fragmentShader, 4); } - protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, in Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputExposure, dispatchParams.Exposure); commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvRcasInput, Resources.InternalUpscaled[frameIndex]); @@ -309,7 +309,7 @@ namespace ArmASR { private readonly ComputeBuffer _generateReactiveConstants; - public AsrGenerateReactivePass(Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants, ComputeBuffer generateReactiveConstants) + public AsrGenerateReactivePass(in Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants, ComputeBuffer generateReactiveConstants) : base(contextDescription, resources, constants) { _generateReactiveConstants = generateReactiveConstants; @@ -317,7 +317,7 @@ namespace ArmASR InitFragmentShader("Auto-Generate Reactive Mask", contextDescription.Shaders.fragmentShader, 0); } - protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, in Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { } From 133343b28529c5215b56ee64bad6466e0f731bf7 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Wed, 26 Mar 2025 16:22:14 +0100 Subject: [PATCH 81/88] Added texture array support for inputs and outputs --- .../ASR/Shaders/ffxm_fsr2_common.cginc | 35 +++++++++++ .../shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h | 62 ++++++++++++------- 2 files changed, 73 insertions(+), 24 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc index 74adb10..5db4b7a 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc @@ -47,3 +47,38 @@ #define unorm #define globallycoherent #endif + +// Workaround for HDRP using texture arrays for its camera buffers on some platforms +// The below defines are adapted from: Packages/com.unity.render-pipelines.core/ShaderLibrary/TextureXR.hlsl +#if ((defined(SHADER_API_D3D11) || defined(SHADER_API_D3D12)) && !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_GAMECORE)) || defined(SHADER_API_PSSL) || defined(SHADER_API_VULKAN) + #define UNITY_TEXTURE2D_X_ARRAY_SUPPORTED +#endif + +// Control if TEXTURE2D_X macros will expand to texture arrays +#if defined(UNITY_TEXTURE2D_X_ARRAY_SUPPORTED) && defined(UNITY_FFXM_TEXTURE2D_X_ARRAY) + #define USE_TEXTURE2D_X_AS_ARRAY +#endif + +// Early defines for single-pass instancing +#if defined(STEREO_INSTANCING_ON) && defined(UNITY_TEXTURE2D_X_ARRAY_SUPPORTED) + #define UNITY_STEREO_INSTANCING_ENABLED +#endif + +// Helper macros to handle XR single-pass with Texture2DArray +#if defined(USE_TEXTURE2D_X_AS_ARRAY) + + // Only single-pass stereo instancing used array indexing + #if defined(UNITY_STEREO_INSTANCING_ENABLED) + static uint unity_StereoEyeIndex; + #define SLICE_ARRAY_INDEX unity_StereoEyeIndex + #else + #define SLICE_ARRAY_INDEX 0 + #endif + + // Declare and sample camera buffers as texture arrays + #define UNITY_FSR_TEX2D(type) Texture2DArray + #define UNITY_FSR_RWTEX2D(type) RWTexture2DArray + #define UNITY_FSR_POS(pxPos) FfxUInt32x3(pxPos, SLICE_ARRAY_INDEX) + #define UNITY_FSR_UV(uv) FfxFloat32x3(uv, SLICE_ARRAY_INDEX) + +#endif diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h index 77b235d..01f1a59 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h @@ -299,21 +299,35 @@ FfxUInt32x2 SPD_RenderSize() } #endif // #if defined(FSR2_BIND_CB_SPD) +// Declare and sample camera buffers as regular textures, unless overridden +#if !defined(UNITY_FSR_TEX2D) +#define UNITY_FSR_TEX2D(type) Texture2D +#endif +#if !defined(UNITY_FSR_RWTEX2D) +#define UNITY_FSR_RWTEX2D(type) RWTexture2D +#endif +#if !defined(UNITY_FSR_POS) +#define UNITY_FSR_POS(pxPos) (pxPos) +#endif +#if !defined(UNITY_FSR_UV) +#define UNITY_FSR_UV(uv) (uv) +#endif + SamplerState s_PointClamp : register(s0); SamplerState s_LinearClamp : register(s1); // SRVs #if defined FSR2_BIND_SRV_INPUT_COLOR - Texture2D r_input_color_jittered : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_COLOR); + UNITY_FSR_TEX2D(FfxFloat32x4) r_input_color_jittered : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_COLOR); #endif #if defined FSR2_BIND_SRV_INPUT_OPAQUE_ONLY - Texture2D r_input_opaque_only : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY); + UNITY_FSR_TEX2D(FfxFloat32x4) r_input_opaque_only : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY); #endif #if defined FSR2_BIND_SRV_INPUT_MOTION_VECTORS - Texture2D r_input_motion_vectors : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_MOTION_VECTORS); + UNITY_FSR_TEX2D(FfxFloat32x4) r_input_motion_vectors : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_MOTION_VECTORS); #endif #if defined FSR2_BIND_SRV_INPUT_DEPTH - Texture2D r_input_depth : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_DEPTH); + UNITY_FSR_TEX2D(FfxFloat32) r_input_depth : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_DEPTH); #endif #if defined FSR2_BIND_SRV_INPUT_EXPOSURE Texture2D r_input_exposure : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_EXPOSURE); @@ -322,10 +336,10 @@ SamplerState s_LinearClamp : register(s1); Texture2D r_auto_exposure : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_AUTO_EXPOSURE); #endif #if defined FSR2_BIND_SRV_REACTIVE_MASK - Texture2D r_reactive_mask : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_REACTIVE_MASK); + UNITY_FSR_TEX2D(FfxFloat32) r_reactive_mask : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_REACTIVE_MASK); #endif #if defined FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK - Texture2D r_transparency_and_composition_mask : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK); + UNITY_FSR_TEX2D(FfxFloat32) r_transparency_and_composition_mask : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK); #endif #if defined FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH Texture2D r_reconstructed_previous_nearest_depth : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH); @@ -414,7 +428,7 @@ SamplerState s_LinearClamp : register(s1); RWTexture2D rw_luma_history : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY); #endif #if defined FSR2_BIND_UAV_UPSCALED_OUTPUT - RWTexture2D rw_upscaled_output : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_UPSCALED_OUTPUT); + UNITY_FSR_RWTEX2D(FfxFloat32x4) rw_upscaled_output : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_UPSCALED_OUTPUT); #endif #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE globallycoherent RWTexture2D rw_img_mip_shading_change : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE); @@ -456,7 +470,7 @@ FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxUInt32 mipLevel) #if defined(FSR2_BIND_SRV_INPUT_DEPTH) FfxFloat32 LoadInputDepth(FfxUInt32x2 iPxPos) { - return r_input_depth[iPxPos]; + return r_input_depth[UNITY_FSR_POS(iPxPos)]; } /* dd00 (-1,1) *------* dd10 (0,-1) @@ -470,7 +484,7 @@ void GatherInputDepthRQuad(FfxFloat32x2 fUV, FFXM_PARAMETER_INOUT FfxFloat32 dd01, FFXM_PARAMETER_INOUT FfxFloat32 dd11) { - FfxFloat32x4 rrrr = r_input_depth.GatherRed(s_PointClamp, fUV); + FfxFloat32x4 rrrr = r_input_depth.GatherRed(s_PointClamp, UNITY_FSR_UV(fUV)); dd01 = FfxFloat32(rrrr.x); dd11 = FfxFloat32(rrrr.y); dd10 = FfxFloat32(rrrr.z); @@ -481,14 +495,14 @@ void GatherInputDepthRQuad(FfxFloat32x2 fUV, #if defined(FSR2_BIND_SRV_INPUT_DEPTH) FfxFloat32 SampleInputDepth(FfxFloat32x2 fUV) { - return r_input_depth.SampleLevel(s_LinearClamp, fUV, 0).x; + return r_input_depth.SampleLevel(s_LinearClamp, UNITY_FSR_UV(fUV), 0).x; } #endif #if defined(FSR2_BIND_SRV_REACTIVE_MASK) FfxFloat32 LoadReactiveMask(FfxUInt32x2 iPxPos) { - return r_reactive_mask[iPxPos]; + return r_reactive_mask[UNITY_FSR_POS(iPxPos)]; } /* col00 (-1,1) *------* col10 (0,-1) @@ -502,7 +516,7 @@ void GatherReactiveRQuad(FfxFloat32x2 fUV, FFXM_PARAMETER_INOUT FFXM_MIN16_F col01, FFXM_PARAMETER_INOUT FFXM_MIN16_F col11) { - FFXM_MIN16_F4 rrrr = r_reactive_mask.GatherRed(s_PointClamp, fUV); + FFXM_MIN16_F4 rrrr = r_reactive_mask.GatherRed(s_PointClamp, UNITY_FSR_UV(fUV)); col01 = FFXM_MIN16_F(rrrr.x); col11 = FFXM_MIN16_F(rrrr.y); col10 = FFXM_MIN16_F(rrrr.z); @@ -513,7 +527,7 @@ void GatherReactiveRQuad(FfxFloat32x2 fUV, #if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos) { - return r_transparency_and_composition_mask[iPxPos]; + return r_transparency_and_composition_mask[UNITY_FSR_POS(iPxPos)]; } /* col00 (-1,1) *------* col10 (0,-1) @@ -527,7 +541,7 @@ void GatherTransparencyAndCompositionMaskRQuad(FfxFloat32x2 fUV, FFXM_PARAMETER_INOUT FFXM_MIN16_F col01, FFXM_PARAMETER_INOUT FFXM_MIN16_F col11) { - FFXM_MIN16_F4 rrrr = r_transparency_and_composition_mask.GatherRed(s_PointClamp, fUV); + FFXM_MIN16_F4 rrrr = r_transparency_and_composition_mask.GatherRed(s_PointClamp, UNITY_FSR_UV(fUV)); col01 = FFXM_MIN16_F(rrrr.x); col11 = FFXM_MIN16_F(rrrr.y); col10 = FFXM_MIN16_F(rrrr.z); @@ -538,7 +552,7 @@ void GatherTransparencyAndCompositionMaskRQuad(FfxFloat32x2 fUV, #if defined(FSR2_BIND_SRV_INPUT_COLOR) FFXM_MIN16_F3 LoadInputColor(FfxUInt32x2 iPxPos) { - return r_input_color_jittered[iPxPos].rgb; + return r_input_color_jittered[UNITY_FSR_POS(iPxPos)].rgb; } /* col00 (-1,1) *------* col10 (0,-1) @@ -552,9 +566,9 @@ void GatherInputColorRGBQuad(FfxFloat32x2 fUV, FFXM_PARAMETER_INOUT FFXM_MIN16_F3 col01, FFXM_PARAMETER_INOUT FFXM_MIN16_F3 col11) { - FFXM_MIN16_F4 rrrr = r_input_color_jittered.GatherRed(s_PointClamp, fUV); - FFXM_MIN16_F4 gggg = r_input_color_jittered.GatherGreen(s_PointClamp, fUV); - FFXM_MIN16_F4 bbbb = r_input_color_jittered.GatherBlue(s_PointClamp, fUV); + FFXM_MIN16_F4 rrrr = r_input_color_jittered.GatherRed(s_PointClamp, UNITY_FSR_UV(fUV)); + FFXM_MIN16_F4 gggg = r_input_color_jittered.GatherGreen(s_PointClamp, UNITY_FSR_UV(fUV)); + FFXM_MIN16_F4 bbbb = r_input_color_jittered.GatherBlue(s_PointClamp, UNITY_FSR_UV(fUV)); col01 = FFXM_MIN16_F3(rrrr.x, gggg.x, bbbb.x); col11 = FFXM_MIN16_F3(rrrr.y, gggg.y, bbbb.y); col10 = FFXM_MIN16_F3(rrrr.z, gggg.z, bbbb.z); @@ -565,7 +579,7 @@ void GatherInputColorRGBQuad(FfxFloat32x2 fUV, #if defined(FSR2_BIND_SRV_INPUT_COLOR) FFXM_MIN16_F3 SampleInputColor(FfxFloat32x2 fUV) { - return r_input_color_jittered.SampleLevel(s_LinearClamp, fUV, 0).rgb; + return r_input_color_jittered.SampleLevel(s_LinearClamp, UNITY_FSR_UV(fUV), 0).rgb; } #endif @@ -603,7 +617,7 @@ void GatherPreparedInputColorRGBQuad(FfxFloat32x2 fUV, #if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS) FFXM_MIN16_F2 LoadInputMotionVector(FfxUInt32x2 iPxDilatedMotionVectorPos) { - FFXM_MIN16_F2 fSrcMotionVector = r_input_motion_vectors[iPxDilatedMotionVectorPos].xy; + FFXM_MIN16_F2 fSrcMotionVector = r_input_motion_vectors[UNITY_FSR_POS(iPxDilatedMotionVectorPos)].xy; FFXM_MIN16_F2 fUvMotionVector = fSrcMotionVector * MotionVectorScale(); @@ -625,8 +639,8 @@ void GatherInputMotionVectorRGQuad(FfxFloat32x2 fUV, FFXM_PARAMETER_INOUT FFXM_MIN16_F2 col01, FFXM_PARAMETER_INOUT FFXM_MIN16_F2 col11) { - FFXM_MIN16_F4 rrrr = r_input_motion_vectors.GatherRed(s_PointClamp, fUV); - FFXM_MIN16_F4 gggg = r_input_motion_vectors.GatherGreen(s_PointClamp, fUV); + FFXM_MIN16_F4 rrrr = r_input_motion_vectors.GatherRed(s_PointClamp, UNITY_FSR_UV(fUV)); + FFXM_MIN16_F4 gggg = r_input_motion_vectors.GatherGreen(s_PointClamp, UNITY_FSR_UV(fUV)); col01 = FFXM_MIN16_F2(rrrr.x, gggg.x) * MotionVectorScale(); col11 = FFXM_MIN16_F2(rrrr.y, gggg.y) * MotionVectorScale(); col10 = FFXM_MIN16_F2(rrrr.z, gggg.z) * MotionVectorScale(); @@ -711,7 +725,7 @@ void StoreInternalColorAndWeight(FfxUInt32x2 iPxPos, FfxFloat32x4 fColorAndWeigh #if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT) void StoreUpscaledOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor) { - rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor, 1.f); + rw_upscaled_output[UNITY_FSR_POS(iPxPos)] = FfxFloat32x4(fColor, 1.f); } #endif @@ -948,7 +962,7 @@ FFXM_MIN16_F2 LoadDilatedReactiveMasks(FFXM_PARAMETER_IN FfxUInt32x2 iPxPos) #if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) FfxFloat32x3 LoadOpaqueOnly(FFXM_PARAMETER_IN FFXM_MIN16_I2 iPxPos) { - return r_input_opaque_only[iPxPos].xyz; + return r_input_opaque_only[UNITY_FSR_POS(iPxPos)].xyz; } #endif From 1222d236f5338978ff144d014fce34cd772233a6 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Wed, 26 Mar 2025 22:25:51 +0100 Subject: [PATCH 82/88] Fixed dynamic resolution scaling: - Two instances of the same bug in ARM's code, where RenderSize was used instead of MaxRenderSize to calculate UVs - One instance of me previously fixing a bug in Isonzo that isn't a bug in the latest PPV2 code, oops --- .../Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h | 2 +- .../Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_upsample.h | 4 ++-- .../PostProcessing/Runtime/PostProcessLayer.cs | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h index 1efc9e1..6725573 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h @@ -65,7 +65,7 @@ FfxBoolean ComputeThinFeatureConfidence(FfxInt32x2 pos) FFXM_MIN16_F lumaSamples [9]; FFXM_MIN16_F fTmpDummy = FFXM_MIN16_F(0.0f); - const FfxFloat32x2 fInputLumaSize = FfxFloat32x2(RenderSize()); + const FfxFloat32x2 fInputLumaSize = FfxFloat32x2(MaxRenderSize()); const FfxFloat32x2 fPxBaseUv = FfxFloat32x2(pos) / fInputLumaSize; const FfxFloat32x2 fUnitUv = FfxFloat32x2(1.0f, 1.0f) / fInputLumaSize; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_upsample.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_upsample.h index d41127d..d1829bf 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_upsample.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_upsample.h @@ -84,8 +84,8 @@ FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams p FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0 - FfxFloat32x2 iSrcInputUv = FfxFloat32x2(fSrcOutputPos) / FfxFloat32x2(RenderSize()); - FfxFloat32x2 unitOffsetUv = FfxFloat32x2(1.0f, 1.0f) / FfxFloat32x2(RenderSize()); + FfxFloat32x2 iSrcInputUv = FfxFloat32x2(fSrcOutputPos) / FfxFloat32x2(MaxRenderSize()); + FfxFloat32x2 unitOffsetUv = FfxFloat32x2(1.0f, 1.0f) / FfxFloat32x2(MaxRenderSize()); FFXM_MIN16_F4 fColorAndWeight = FFXM_MIN16_F4(0.0f, 0.0f, 0.0f, 0.0f); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessLayer.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessLayer.cs index a51151f..0c32a88 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessLayer.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessLayer.cs @@ -742,8 +742,7 @@ namespace UnityEngine.Rendering.PostProcessing // Create a copy of the opaque-only color buffer for auto-reactive mask generation if (context.IsSuperResolutionActive() && (upscaling.autoGenerateReactiveMask || upscaling.autoGenerateTransparencyAndComposition)) { - Vector2Int scaledRenderSize = upscaling.GetScaledRenderSize(context.camera); - m_opaqueOnly = context.GetScreenSpaceTemporaryRT(colorFormat: sourceFormat, widthOverride: scaledRenderSize.x, heightOverride: scaledRenderSize.y); + m_opaqueOnly = context.GetScreenSpaceTemporaryRT(colorFormat: sourceFormat); m_LegacyCmdBufferOpaque.BuiltinBlit(cameraTarget, m_opaqueOnly); } From d7483c31c44d66e012a30b07e92ff080bd290a31 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Wed, 26 Mar 2025 22:29:11 +0100 Subject: [PATCH 83/88] Added some commented-out debug pragmas to the compute shaders --- .../Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_legacy.compute | 1 + .../Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute | 1 + .../Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute | 1 + .../Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute | 1 + 4 files changed, 4 insertions(+) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_legacy.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_legacy.compute index 660b922..d7d43d4 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_legacy.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_legacy.compute @@ -1,4 +1,5 @@ #pragma kernel main +//#pragma enable_d3d11_debug_symbols #pragma multi_compile __ FFXM_HALF #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute index f15eb80..50138c7 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute @@ -1,6 +1,7 @@ #pragma kernel main #pragma only_renderers d3d11 vulkan metal ps5 xboxseries #pragma use_dxc +//#pragma enable_d3d11_debug_symbols #pragma multi_compile __ FFXM_HALF #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute index 0aeae34..04c03ac 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute @@ -1,4 +1,5 @@ #pragma kernel main +//#pragma enable_d3d11_debug_symbols #pragma multi_compile __ FFXM_HALF #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute index bcac985..a9d80c2 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute @@ -1,6 +1,7 @@ #pragma kernel main #pragma only_renderers d3d11 vulkan metal ps5 xboxseries #pragma use_dxc +//#pragma enable_d3d11_debug_symbols #pragma multi_compile __ FFXM_HALF #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS From 99f28af11d13bd4cfbfe6a0c8eda167e26c5f7e1 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Wed, 26 Mar 2025 23:59:42 +0100 Subject: [PATCH 84/88] Removed Xbox Series from the modern shaders, as it doesn't like Native16Bit at all and just falls back to legacy, and restored Native16Bit to all the modern shaders. Xbox Series will still use DXC and wave operations. --- .../Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs | 1 - .../ASR/Shaders/ffxm_fsr2_fragment_modern.shader | 10 +++++----- .../ASR/Shaders/ffxm_fsr2_lock_modern.compute | 3 ++- .../ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute | 8 +++++++- .../ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute | 5 +++-- 5 files changed, 17 insertions(+), 10 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs index 1d65342..7bdd2b7 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs @@ -70,7 +70,6 @@ namespace ArmASR case GraphicsDeviceType.Metal: case GraphicsDeviceType.PlayStation5: case GraphicsDeviceType.PlayStation5NGGC: - case GraphicsDeviceType.GameCoreXboxSeries: return modernShaders; default: return legacyShaders; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader index ec7f5a8..d922996 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader @@ -12,7 +12,7 @@ Shader "TND/ASR/ffxm_fsr2_fragment_modern" #pragma vertex VertMain #pragma fragment main #pragma target 4.5 - #pragma only_renderers d3d11 vulkan metal ps5 xboxseries + #pragma only_renderers d3d11 vulkan metal ps5 #pragma use_dxc #pragma require Native16Bit //#pragma enable_d3d11_debug_symbols @@ -35,7 +35,7 @@ Shader "TND/ASR/ffxm_fsr2_fragment_modern" #pragma vertex VertMain #pragma fragment main #pragma target 4.5 - #pragma only_renderers d3d11 vulkan metal ps5 xboxseries + #pragma only_renderers d3d11 vulkan metal ps5 #pragma use_dxc #pragma require Native16Bit //#pragma enable_d3d11_debug_symbols @@ -62,7 +62,7 @@ Shader "TND/ASR/ffxm_fsr2_fragment_modern" #pragma vertex VertMain #pragma fragment main #pragma target 4.5 - #pragma only_renderers d3d11 vulkan metal ps5 xboxseries + #pragma only_renderers d3d11 vulkan metal ps5 #pragma use_dxc #pragma require Native16Bit //#pragma enable_d3d11_debug_symbols @@ -90,7 +90,7 @@ Shader "TND/ASR/ffxm_fsr2_fragment_modern" #pragma vertex VertMain #pragma fragment main #pragma target 4.5 - #pragma only_renderers d3d11 vulkan metal ps5 xboxseries + #pragma only_renderers d3d11 vulkan metal ps5 #pragma use_dxc #pragma require Native16Bit //#pragma enable_d3d11_debug_symbols @@ -120,7 +120,7 @@ Shader "TND/ASR/ffxm_fsr2_fragment_modern" #pragma vertex VertMain #pragma fragment main #pragma target 4.5 - #pragma only_renderers d3d11 vulkan metal ps5 xboxseries + #pragma only_renderers d3d11 vulkan metal ps5 #pragma use_dxc #pragma require Native16Bit //#pragma enable_d3d11_debug_symbols diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute index 50138c7..8b1aca0 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute @@ -1,6 +1,7 @@ #pragma kernel main -#pragma only_renderers d3d11 vulkan metal ps5 xboxseries +#pragma only_renderers d3d11 vulkan metal ps5 #pragma use_dxc +#pragma require Native16Bit //#pragma enable_d3d11_debug_symbols #pragma multi_compile __ FFXM_HALF diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute index 04c03ac..d760c72 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute @@ -10,7 +10,13 @@ #include "ffxm_fsr2_common.cginc" -// Disable wave operations altogether +// Enable wave operations for the platforms that support it +#if defined(SHADER_API_GAMECORE_XBOXSERIES) +#pragma require WaveBasic // Required for WaveGetLaneIndex +#pragma require WaveBallot // Required for WaveReadLaneAt +#pragma require QuadShuffle // Required for QuadReadAcross +#else #define FFXM_SPD_NO_WAVE_OPERATIONS +#endif #include "shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl" diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute index a9d80c2..7848883 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute @@ -1,6 +1,7 @@ #pragma kernel main -#pragma only_renderers d3d11 vulkan metal ps5 xboxseries +#pragma only_renderers d3d11 vulkan metal ps5 #pragma use_dxc +#pragma require Native16Bit //#pragma enable_d3d11_debug_symbols #pragma multi_compile __ FFXM_HALF @@ -13,7 +14,7 @@ #include "ffxm_fsr2_common.cginc" // Enable wave operations for the platforms that support it -#if (defined(SHADER_API_D3D12) || defined(SHADER_API_VULKAN) || defined(SHADER_API_METAL) || defined(SHADER_API_GAMECORE)) && !defined(SHADER_API_MOBILE) +#if (defined(SHADER_API_D3D12) || defined(SHADER_API_VULKAN) || defined(SHADER_API_METAL)) && !defined(SHADER_API_MOBILE) #pragma require WaveBasic // Required for WaveGetLaneIndex #pragma require WaveBallot // Required for WaveReadLaneAt #pragma require QuadShuffle // Required for QuadReadAcross From fdf2125cfee0b1e514f6fe1f62f2b2ea5a69e1b8 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Thu, 27 Mar 2025 20:27:00 +0100 Subject: [PATCH 85/88] Revert "Removed Xbox Series from the modern shaders, as it doesn't like Native16Bit at all and just falls back to legacy, and restored Native16Bit to all the modern shaders. Xbox Series will still use DXC and wave operations." This reverts commit 99f28af11d13bd4cfbfe6a0c8eda167e26c5f7e1. --- .../Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs | 1 + .../ASR/Shaders/ffxm_fsr2_fragment_modern.shader | 10 +++++----- .../ASR/Shaders/ffxm_fsr2_lock_modern.compute | 3 +-- .../ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute | 8 +------- .../ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute | 5 ++--- 5 files changed, 10 insertions(+), 17 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs index 7bdd2b7..1d65342 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs @@ -70,6 +70,7 @@ namespace ArmASR case GraphicsDeviceType.Metal: case GraphicsDeviceType.PlayStation5: case GraphicsDeviceType.PlayStation5NGGC: + case GraphicsDeviceType.GameCoreXboxSeries: return modernShaders; default: return legacyShaders; diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader index d922996..ec7f5a8 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader @@ -12,7 +12,7 @@ Shader "TND/ASR/ffxm_fsr2_fragment_modern" #pragma vertex VertMain #pragma fragment main #pragma target 4.5 - #pragma only_renderers d3d11 vulkan metal ps5 + #pragma only_renderers d3d11 vulkan metal ps5 xboxseries #pragma use_dxc #pragma require Native16Bit //#pragma enable_d3d11_debug_symbols @@ -35,7 +35,7 @@ Shader "TND/ASR/ffxm_fsr2_fragment_modern" #pragma vertex VertMain #pragma fragment main #pragma target 4.5 - #pragma only_renderers d3d11 vulkan metal ps5 + #pragma only_renderers d3d11 vulkan metal ps5 xboxseries #pragma use_dxc #pragma require Native16Bit //#pragma enable_d3d11_debug_symbols @@ -62,7 +62,7 @@ Shader "TND/ASR/ffxm_fsr2_fragment_modern" #pragma vertex VertMain #pragma fragment main #pragma target 4.5 - #pragma only_renderers d3d11 vulkan metal ps5 + #pragma only_renderers d3d11 vulkan metal ps5 xboxseries #pragma use_dxc #pragma require Native16Bit //#pragma enable_d3d11_debug_symbols @@ -90,7 +90,7 @@ Shader "TND/ASR/ffxm_fsr2_fragment_modern" #pragma vertex VertMain #pragma fragment main #pragma target 4.5 - #pragma only_renderers d3d11 vulkan metal ps5 + #pragma only_renderers d3d11 vulkan metal ps5 xboxseries #pragma use_dxc #pragma require Native16Bit //#pragma enable_d3d11_debug_symbols @@ -120,7 +120,7 @@ Shader "TND/ASR/ffxm_fsr2_fragment_modern" #pragma vertex VertMain #pragma fragment main #pragma target 4.5 - #pragma only_renderers d3d11 vulkan metal ps5 + #pragma only_renderers d3d11 vulkan metal ps5 xboxseries #pragma use_dxc #pragma require Native16Bit //#pragma enable_d3d11_debug_symbols diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute index 8b1aca0..50138c7 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute @@ -1,7 +1,6 @@ #pragma kernel main -#pragma only_renderers d3d11 vulkan metal ps5 +#pragma only_renderers d3d11 vulkan metal ps5 xboxseries #pragma use_dxc -#pragma require Native16Bit //#pragma enable_d3d11_debug_symbols #pragma multi_compile __ FFXM_HALF diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute index d760c72..04c03ac 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute @@ -10,13 +10,7 @@ #include "ffxm_fsr2_common.cginc" -// Enable wave operations for the platforms that support it -#if defined(SHADER_API_GAMECORE_XBOXSERIES) -#pragma require WaveBasic // Required for WaveGetLaneIndex -#pragma require WaveBallot // Required for WaveReadLaneAt -#pragma require QuadShuffle // Required for QuadReadAcross -#else +// Disable wave operations altogether #define FFXM_SPD_NO_WAVE_OPERATIONS -#endif #include "shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl" diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute index 7848883..a9d80c2 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute @@ -1,7 +1,6 @@ #pragma kernel main -#pragma only_renderers d3d11 vulkan metal ps5 +#pragma only_renderers d3d11 vulkan metal ps5 xboxseries #pragma use_dxc -#pragma require Native16Bit //#pragma enable_d3d11_debug_symbols #pragma multi_compile __ FFXM_HALF @@ -14,7 +13,7 @@ #include "ffxm_fsr2_common.cginc" // Enable wave operations for the platforms that support it -#if (defined(SHADER_API_D3D12) || defined(SHADER_API_VULKAN) || defined(SHADER_API_METAL)) && !defined(SHADER_API_MOBILE) +#if (defined(SHADER_API_D3D12) || defined(SHADER_API_VULKAN) || defined(SHADER_API_METAL) || defined(SHADER_API_GAMECORE)) && !defined(SHADER_API_MOBILE) #pragma require WaveBasic // Required for WaveGetLaneIndex #pragma require WaveBallot // Required for WaveReadLaneAt #pragma require QuadShuffle // Required for QuadReadAcross From 1dc62167508ee36a7b669f86ffaf4e21beecba54 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Thu, 27 Mar 2025 20:38:20 +0100 Subject: [PATCH 86/88] Simplified platform wave operation support check --- .../Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute index a9d80c2..5e1e43a 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute @@ -13,7 +13,7 @@ #include "ffxm_fsr2_common.cginc" // Enable wave operations for the platforms that support it -#if (defined(SHADER_API_D3D12) || defined(SHADER_API_VULKAN) || defined(SHADER_API_METAL) || defined(SHADER_API_GAMECORE)) && !defined(SHADER_API_MOBILE) +#if defined(PLATFORM_SUPPORTS_WAVE_INTRINSICS) && !defined(SHADER_API_MOBILE) #pragma require WaveBasic // Required for WaveGetLaneIndex #pragma require WaveBallot // Required for WaveReadLaneAt #pragma require QuadShuffle // Required for QuadReadAcross From f8483414f270ea04ba853e3c604979122a6cbdd8 Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Sun, 30 Mar 2025 20:15:49 +0200 Subject: [PATCH 87/88] Fixed exposure instability for a few seconds after initialization on iOS Metal, and cleared out a shader compiler warning at run-time. --- .../Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs | 2 +- .../Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs index 1562dd6..e436142 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs @@ -167,7 +167,7 @@ namespace ArmASR // Auto exposure always used to track luma changes in locking logic commandBuffer.SetRenderTarget(_resources.AutoExposure[frameIndex ^ 1]); - commandBuffer.ClearRenderTarget(false, true, new Color(-1f, 1e8f, 0f, 0f)); + commandBuffer.ClearRenderTarget(false, true, new Color(0f, 1e8f, 0f, 0f)); // Reset atomic counter to 0 commandBuffer.SetRenderTarget(_resources.SpdAtomicCounter); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h index eb14e4b..eb12bce 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h @@ -19,7 +19,7 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -FFXM_GROUPSHARED FfxUInt32 spdCounter; +FFXM_GROUPSHARED FfxUInt32 spdCounter = 0u; void SpdIncreaseAtomicCounter(FfxUInt32 slice) { From dad922c3380f6249431eb2ddb18027bd26e8fa5c Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Tue, 1 Apr 2025 13:48:57 +0200 Subject: [PATCH 88/88] Swapped Luma and MotionVector outputs on the Reconstruct & Dilate pass, which fixes another apparent alignment issue on PS5 NGGC, causing the motion vectors to break occasionally when switching scaling ratios. --- .../Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs | 4 ++-- .../ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs index 55d49d3..b5abd87 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs @@ -162,8 +162,8 @@ namespace ArmASR commandBuffer.SetRandomWriteTarget(3, AsrShaderIDs.UavReconstructedPrevNearestDepth); _mrt[0] = AsrShaderIDs.RtDilatedDepth; // fDepth - _mrt[1] = Resources.DilatedMotionVectors[frameIndex]; // fMotionVector - _mrt[2] = AsrShaderIDs.RtLockInputLuma; // fLuma + _mrt[1] = AsrShaderIDs.RtLockInputLuma; // fLuma + _mrt[2] = Resources.DilatedMotionVectors[frameIndex]; // fMotionVector FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride); BlitFragment(commandBuffer, _mrt); diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl index 1c27721..457665b 100644 --- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl +++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl @@ -43,16 +43,16 @@ #if defined(SHADER_API_PSSL) #pragma PSSL_target_output_format(target 0 FMT_32_R) -#pragma PSSL_target_output_format(target 1 FMT_FP16_ABGR) -#pragma PSSL_target_output_format(target 2 FMT_32_R) +#pragma PSSL_target_output_format(target 1 FMT_32_R) +#pragma PSSL_target_output_format(target 2 FMT_FP16_ABGR) #pragma PSSL_target_output_format(target 3 FMT_32_R) #endif struct ReconstructPrevDepthOutputsFS { FfxFloat32 fDepth : SV_TARGET0; - FfxFloat32x2 fMotionVector : SV_TARGET1; - FfxFloat32 fLuma : SV_TARGET2; + FfxFloat32 fLuma : SV_TARGET1; + FfxFloat32x2 fMotionVector : SV_TARGET2; }; @@ -62,7 +62,7 @@ ReconstructPrevDepthOutputsFS main(float4 SvPosition : SV_POSITION) ReconstructPrevDepthOutputs result = ReconstructAndDilate(uPixelCoord); ReconstructPrevDepthOutputsFS output = (ReconstructPrevDepthOutputsFS)0; output.fDepth = result.fDepth; - output.fMotionVector = result.fMotionVector; output.fLuma = result.fLuma; + output.fMotionVector = result.fMotionVector; return output; }