Working SGSR2 3-pass:

- Added padding field to fix buffer data misalignment issue - Simplified the translation of GLSL globalInvocationID to HLSL
1 year ago · 7a259062d8
5 changed files with 17 additions and 15 deletions
--- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs
@ -16,7 +16,7 @@ namespace UnityEngine.Rendering.PostProcessing
 	        [InspectorName("FidelityFX Super Resolution 2.2 (FSR2)")] FSR2,
 	        [InspectorName("FidelityFX Super Resolution 3.1 (FSR3)")] FSR3,
 	        //[InspectorName("Arm Accuracy Super Resolution (ASR)")] ASR,
-	        //[InspectorName("Snapdragon Game Super Resolution 2 (SGSR2)")] SGSR2,
+	        [InspectorName("Snapdragon Game Super Resolution 2 (SGSR2)")] SGSR2,
 	        [InspectorName("PlayStation Spectral Super Resolution (PSSR)")] PSSR,
        }

@ -180,6 +180,7 @@ namespace UnityEngine.Rendering.PostProcessing
 				{
 					UpscalerType.FSR2 when FSR2Upscaler.IsSupported => new FSR2Upscaler(),
 					UpscalerType.FSR3 when FSR3Upscaler.IsSupported => new FSR3Upscaler(),
+					UpscalerType.SGSR2 when SGSR2Upscaler.IsSupported => new SGSR2Upscaler(),
 					_ => new FSR2Upscaler(),	// Fallback for when the selected upscaler is not supported on the current hardware
 				};

--- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs
@ -13,6 +13,7 @@ public class SGSR2
        public Vector2 renderSizeRcp;
        public Vector2 displaySizeRcp;
        public Vector2 jitterOffset;
+        public Vector2 padding1;
        public Matrix4x4 clipToPrevClip;
        public float preExposure;
        public float cameraFovAngleHor;
--- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_activate.hlsl
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_activate.hlsl
@ -36,6 +36,7 @@ cbuffer Params : register(b0)
    float2               ViewportSizeInverse;
    float2               displaySizeRcp;
    float2               jitterOffset;
+    float2               padding1;
    float4               clipToPrevClip[4];
    float                preExposure;
    float                cameraFovAngleHor;
@ -49,7 +50,7 @@ SamplerState s_PointClamp   : register(s0);
 SamplerState s_LinearClamp  : register(s1);

 [numthreads(8, 8, 1)]
-void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
+void CS(uint3 globalInvocationID : SV_DispatchThreadID)
 {
    int2 sampleOffset[4] = {
        int2(-1, -1),
@ -58,14 +59,14 @@ void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
        int2(+0, +0)
    };

-    uint2 InputPos = GroupId * uint2(8, 8) + GroupThreadId;
+    uint2 InputPos = globalInvocationID.xy;

-    float2 ViewportUV = (float2(InputPos) + 0.5f) * ViewportSizeInverse;
+    float2 ViewportUV = (float2(globalInvocationID.xy) + 0.5f) * ViewportSizeInverse;
    float2 gatherCoord = ViewportUV + 0.5f * ViewportSizeInverse;
    uint luma_reference32 = YCoCgColor.GatherRed(s_PointClamp, gatherCoord).w;
    float luma_reference = DecodeColorY(luma_reference32);

-	float4 mda = MotionDepthAlphaBuffer[InputPos].xyzw; //motion depth alpha
+	float4 mda = MotionDepthAlphaBuffer[globalInvocationID.xy].xyzw; //motion depth alpha
 	float depth = mda.z;
 	float alphamask = mda.w;
 	float2 motion = mda.xy;
--- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_convert.hlsl
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_convert.hlsl
@ -28,6 +28,7 @@ cbuffer Params : register(b0)
    float2               ViewportSizeInverse;
    float2               displaySizeRcp;
    float2               jitterOffset;
+    float2               padding1;
    float4               clipToPrevClip[4];
    float                preExposure;
    float                cameraFovAngleHor;
@ -41,12 +42,11 @@ SamplerState s_PointClamp   : register(s0);
 SamplerState s_LinearClamp  : register(s1);

 [numthreads(8, 8, 1)]
-void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
+void CS(uint3 globalInvocationID : SV_DispatchThreadID)
 {
-    uint2 InputPos = GroupId * uint2(8, 8) + GroupThreadId;
-
-    float2 gatherCoord = float2(InputPos) * ViewportSizeInverse;
+    float2 gatherCoord = float2(globalInvocationID.xy) * ViewportSizeInverse;
    float2 ViewportUV = gatherCoord + 0.5f * ViewportSizeInverse;
+    uint2 InputPos = globalInvocationID.xy;

    //derived from ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h
    //FindNearestDepth
--- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_upscale.hlsl
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_upscale.hlsl
@ -40,6 +40,7 @@ cbuffer Params : register(b0)
    float2               renderSizeRcp;
    float2               displaySizeRcp;
    float2               jitterOffset;
+    float2               padding1;
    float4               clipToPrevClip[4];
    float                preExposure;
    float                cameraFovAngleHor;
@ -53,10 +54,8 @@ SamplerState s_PointClamp   : register(s0);
 SamplerState s_LinearClamp  : register(s1);

 [numthreads(8, 8, 1)]
-void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
+void CS(uint3 globalInvocationID : SV_DispatchThreadID)
 {
-    uint2 InvocationID = GroupId * uint2(8, 8) + GroupThreadId;
-    
    float Biasmax_viewportXScale = min(float(displaySize.x) / float(renderSize.x), 1.99);  //Biasmax_viewportXScale
    float scalefactor = min(20.0, pow((float(displaySize.x) / float(renderSize.x)) * (float(displaySize.y) / float(renderSize.y)), 3.0));
    float f2 = preExposure;            //1.0;   //preExposure
@ -64,7 +63,7 @@ void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
    float2 HistoryInfoViewportSize = float2(displaySize);
    float2 InputJitter = jitterOffset;
    float2 InputInfoViewportSize = float2(renderSize);
-    float2 Hruv = (float2(InvocationID) + 0.5f) * HistoryInfoViewportSizeInverse;
+    float2 Hruv = (float2(globalInvocationID.xy) + 0.5f) * HistoryInfoViewportSizeInverse;
    float2 Jitteruv;
    Jitteruv.x = clamp(Hruv.x + (InputJitter.x * HistoryInfoViewportSizeInverse.x), 0.0, 1.0);
    Jitteruv.y = clamp(Hruv.y + (InputJitter.y * HistoryInfoViewportSizeInverse.y), 0.0, 1.0);
@ -297,7 +296,7 @@ void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
    float alpha = clamp(Upsampledcw.w / alphasum + float(reset), 0.0, 1.0);
    Upsampledcw.xyz = lerp(HistoryColor, Upsampledcw.xyz, alpha);

-    HistoryOutput[InvocationID.xy] = float4(Upsampledcw.xyz, Wfactor);
+    HistoryOutput[globalInvocationID.xy] = float4(Upsampledcw.xyz, Wfactor);

    ////ycocg to grb
    float x_z = Upsampledcw.x - Upsampledcw.z;
@ -311,5 +310,5 @@ void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
    float scale = preExposure /  ((1.0f + 1.0f / 65504.0f) - compMax);   //(1.0f + 1.0f / 65504.0f) = 1.000015e+00

    Upsampledcw.xyz = Upsampledcw.xyz * scale;
-    SceneColorOutput[InvocationID.xy] = Upsampledcw;
+    SceneColorOutput[globalInvocationID.xy] = Upsampledcw;
 }