diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Editor/PostProcessLayerEditor.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Editor/PostProcessLayerEditor.cs
index c89b4bd..d4e5f8d 100644
--- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Editor/PostProcessLayerEditor.cs
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Editor/PostProcessLayerEditor.cs
@@ -34,6 +34,7 @@ namespace UnityEditor.Rendering.PostProcessing
SerializedProperty m_FsrQualityMode;
SerializedProperty m_FsrPerformSharpen;
SerializedProperty m_FsrSharpness;
+ SerializedProperty m_FsrEnableFP16;
SerializedProperty m_FsrExposureSource;
SerializedProperty m_FsrExposureTexture;
SerializedProperty m_FsrPreExposure;
@@ -94,6 +95,7 @@ namespace UnityEditor.Rendering.PostProcessing
m_FsrQualityMode = FindProperty(x => x.upscaling.qualityMode);
m_FsrPerformSharpen = FindProperty(x => x.upscaling.performSharpenPass);
m_FsrSharpness = FindProperty(x => x.upscaling.sharpness);
+ m_FsrEnableFP16 = FindProperty(x => x.upscaling.enableFP16);
m_FsrExposureSource = FindProperty(x => x.upscaling.exposureSource);
m_FsrExposureTexture = FindProperty(x => x.upscaling.exposure);
m_FsrPreExposure = FindProperty(x => x.upscaling.preExposure);
@@ -230,6 +232,7 @@ namespace UnityEditor.Rendering.PostProcessing
EditorGUILayout.PropertyField(m_FsrQualityMode);
EditorGUILayout.PropertyField(m_FsrPerformSharpen);
EditorGUILayout.PropertyField(m_FsrSharpness);
+ EditorGUILayout.PropertyField(m_FsrEnableFP16);
EditorGUILayout.PropertyField(m_FsrExposureSource);
if (m_FsrExposureSource.intValue == (int)Upscaling.ExposureSource.Manual) EditorGUILayout.PropertyField(m_FsrExposureTexture);
EditorGUILayout.PropertyField(m_FsrPreExposure);
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset
index cddc96c..8ed7c12 100644
--- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/PostProcessResources.asset
@@ -114,6 +114,8 @@ MonoBehaviour:
scalableAO: {fileID: 4800000, guid: d7640629310e79646af0f46eb55ae466, type: 3}
multiScaleAO: {fileID: 4800000, guid: 67f9497810829eb4791ec19e95781e51, type: 3}
screenSpaceReflections: {fileID: 4800000, guid: f997a3dc9254c44459323cced085150c, type: 3}
+ sgsr2Upscaler:
+ twoPassFragment: {fileID: 4800000, guid: 59bc1035dd975f64d8141148a7088d0a, type: 3}
computeShaders:
autoExposure: {fileID: 7200000, guid: 34845e0ca016b7448842e965db5890a5, type: 3}
exposureHistogram: {fileID: 7200000, guid: 8c2fcbdf9bc58664f89917f7b9d79501, type: 3}
@@ -151,6 +153,19 @@ MonoBehaviour:
tcrAutoGenPass: {fileID: 7200000, guid: 75cdc6ef23f08ed498d4da511923fcea, type: 3}
debugViewPass: {fileID: 7200000, guid: cb24a71d54164c54eb5e86839acd48c5, type: 3}
sgsr2Upscaler:
- convert: {fileID: 7200000, guid: a41757aacd8b70e42a4001d514bfbe53, type: 3}
- activate: {fileID: 7200000, guid: d7de362950af6fe4e90da7d6e32f9826, type: 3}
- upscale: {fileID: 7200000, guid: 5d28d29787492b74aa736a21f70572c7, type: 3}
+ twoPassCompute:
+ convert: {fileID: 7200000, guid: 073ee927fbee25841a31cf364834071c, type: 3}
+ upscale: {fileID: 7200000, guid: d7bacd7d04c6521499bef936d93921cc, type: 3}
+ threePassCompute:
+ convert: {fileID: 7200000, guid: a41757aacd8b70e42a4001d514bfbe53, type: 3}
+ activate: {fileID: 7200000, guid: d7de362950af6fe4e90da7d6e32f9826, type: 3}
+ upscale: {fileID: 7200000, guid: 5d28d29787492b74aa736a21f70572c7, type: 3}
+ asrUpscalerShaders:
+ legacyShaders:
+ fragmentShader: {fileID: 4800000, guid: 42e5314e46109a441a4527349d8df6e4, type: 3}
+ computeLuminancePyramidPass: {fileID: 7200000, guid: 41d0c3a77d97a904e96ebc2bf18129f6, type: 3}
+ lockPass: {fileID: 7200000, guid: a09277df48840a84196b3bac299544ea, type: 3}
+ modernShaders:
+ fragmentShader: {fileID: 4800000, guid: 147cc2cffac69ef4eb3ea8addafc9d10, type: 3}
+ computeLuminancePyramidPass: {fileID: 7200000, guid: 57220d870cb441c8a6df8a9e15a74283, type: 3}
+ lockPass: {fileID: 7200000, guid: a6e1d5d5372d467790fcf2d089b50ef7, type: 3}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs
index c9dedfb..98773b3 100644
--- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling.cs
@@ -1,4 +1,4 @@
-using System;
+using System;
using System.Runtime.InteropServices;
using FidelityFX.FSR2;
using UnityEngine.Experimental.Rendering;
@@ -15,8 +15,12 @@ namespace UnityEngine.Rendering.PostProcessing
{
[InspectorName("FidelityFX Super Resolution 2.2 (FSR2)")] FSR2,
[InspectorName("FidelityFX Super Resolution 3.1 (FSR3)")] FSR3,
- //[InspectorName("Arm Accuracy Super Resolution (ASR)")] ASR,
- //[InspectorName("Snapdragon Game Super Resolution 2 (SGSR2)")] SGSR2,
+ [InspectorName("Arm Accuracy Super Resolution (ASR) Quality")] ASR_Quality,
+ [InspectorName("Arm Accuracy Super Resolution (ASR) Balanced")] ASR_Balanced,
+ [InspectorName("Arm Accuracy Super Resolution (ASR) Performance")] ASR_Performance,
+ [InspectorName("Snapdragon Game Super Resolution 2 (SGSR2) 2-Pass Fragment")] SGSR2_2PassFS,
+ [InspectorName("Snapdragon Game Super Resolution 2 (SGSR2) 2-Pass Compute")] SGSR2_2PassCS,
+ [InspectorName("Snapdragon Game Super Resolution 2 (SGSR2) 3-Pass Compute")] SGSR2_3PassCS,
[InspectorName("PlayStation Spectral Super Resolution (PSSR)")] PSSR,
}
@@ -30,6 +34,8 @@ namespace UnityEngine.Rendering.PostProcessing
public bool performSharpenPass = true;
[Tooltip("Strength of the sharpening effect.")]
[Range(0, 1)] public float sharpness = 0.8f;
+
+ public bool enableFP16 = true;
[Tooltip("Choose where to get the exposure value from. Use auto-exposure from either the upscaler or Unity, provide a manual exposure texture, or use a default value.")]
public ExposureSource exposureSource = ExposureSource.Auto;
@@ -108,6 +114,7 @@ namespace UnityEngine.Rendering.PostProcessing
private Fsr2.QualityMode _prevQualityMode;
private ExposureSource _prevExposureSource;
private Vector2Int _prevUpscaleSize;
+ private bool _prevFP16;
private Rect _originalRect;
@@ -161,7 +168,7 @@ namespace UnityEngine.Rendering.PostProcessing
// Monitor for any resolution changes and recreate the upscaler context if necessary
// We can't create an upscaler context without info from the post-processing context, so delay the initial setup until here
if (!_initialized || _upscaler == null || _upscaleSize.x != _prevUpscaleSize.x || _upscaleSize.y != _prevUpscaleSize.y ||
- upscalerType != _prevUpscalerType || qualityMode != _prevQualityMode || exposureSource != _prevExposureSource)
+ upscalerType != _prevUpscalerType || qualityMode != _prevQualityMode || exposureSource != _prevExposureSource || enableFP16 != _prevFP16)
{
DestroyUpscaler();
CreateUpscaler(context);
@@ -180,8 +187,13 @@ namespace UnityEngine.Rendering.PostProcessing
{
UpscalerType.FSR2 when FSR2Upscaler.IsSupported => new FSR2Upscaler(),
UpscalerType.FSR3 when FSR3Upscaler.IsSupported => new FSR3Upscaler(),
- //UpscalerType.SGSR2 when SGSR2Upscaler.IsSupported => new SGSR2Upscaler(),
UpscalerType.PSSR when PSSRUpscaler.IsSupported => new PSSRUpscaler(),
+ UpscalerType.ASR_Quality when ASRUpscaler.IsSupported => new ASRUpscaler_Quality(),
+ UpscalerType.ASR_Balanced when ASRUpscaler.IsSupported => new ASRUpscaler_Balanced(),
+ UpscalerType.ASR_Performance when ASRUpscaler.IsSupported => new ASRUpscaler_Performance(),
+ UpscalerType.SGSR2_2PassFS => new SGSR2Upscaler_2PassFS(),
+ UpscalerType.SGSR2_2PassCS when SGSR2Upscaler.IsSupported => new SGSR2Upscaler_2PassCS(),
+ UpscalerType.SGSR2_3PassCS when SGSR2Upscaler.IsSupported => new SGSR2Upscaler_3PassCS(),
_ => new FSR2Upscaler(), // Fallback for when the selected upscaler is not supported on the current hardware
};
@@ -191,6 +203,7 @@ namespace UnityEngine.Rendering.PostProcessing
_prevQualityMode = qualityMode;
_prevExposureSource = exposureSource;
_prevUpscaleSize = _upscaleSize;
+ _prevFP16 = enableFP16;
_callbacks = callbacksFactory(context);
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR.meta
new file mode 100644
index 0000000..1c71416
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: fbb474d5e9430814eb7b83620c3d4189
+folderAsset: yes
+DefaultImporter:
+ externalObjects: {}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime.meta
new file mode 100644
index 0000000..3e15b23
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: 6bbfbdd9fd482bd4ea5e998953ae9972
+folderAsset: yes
+DefaultImporter:
+ externalObjects: {}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs
new file mode 100644
index 0000000..2095317
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs
@@ -0,0 +1,321 @@
+using System;
+using System.Runtime.InteropServices;
+using UnityEngine;
+using UnityEngine.Rendering;
+
+namespace ArmASR
+{
+ ///
+ /// A collection of helper functions and data structures required by the ASR process.
+ ///
+ public static class Asr
+ {
+ ///
+ /// Creates a new ASR context with standard parameters that are appropriate for the current platform.
+ ///
+ public static AsrContext CreateContext(Variant variant, Vector2Int displaySize, Vector2Int maxRenderSize, AsrShaderBundle shaders, InitializationFlags flags = 0)
+ {
+ if (SystemInfo.usesReversedZBuffer)
+ flags |= InitializationFlags.EnableDepthInverted;
+ else
+ flags &= ~InitializationFlags.EnableDepthInverted;
+
+#if UNITY_EDITOR || DEVELOPMENT_BUILD
+ flags |= InitializationFlags.EnableDebugChecking;
+#endif
+
+ Debug.Log($"Setting up ASR {variant} with render size: {maxRenderSize.x}x{maxRenderSize.y}, display size: {displaySize.x}x{displaySize.y}, flags: {flags}");
+
+ var contextDescription = new ContextDescription
+ {
+ Flags = flags,
+ Variant = variant,
+ DisplaySize = displaySize,
+ MaxRenderSize = maxRenderSize,
+ Shaders = shaders.GetShadersForCurrentPlatform(),
+ };
+
+ var context = new AsrContext();
+ context.Create(contextDescription);
+ return context;
+ }
+
+ public static float GetUpscaleRatioFromQualityMode(QualityMode qualityMode)
+ {
+ switch (qualityMode)
+ {
+ case QualityMode.NativeAA:
+ return 1.0f;
+ case QualityMode.UltraQuality:
+ return 1.2f;
+ case QualityMode.Quality:
+ return 1.5f;
+ case QualityMode.Balanced:
+ return 1.7f;
+ case QualityMode.Performance:
+ return 2.0f;
+ case QualityMode.UltraPerformance:
+ return 3.0f;
+ default:
+ return 1.0f;
+ }
+ }
+
+ public static void GetRenderResolutionFromQualityMode(
+ out int renderWidth, out int renderHeight,
+ int displayWidth, int displayHeight, QualityMode qualityMode)
+ {
+ float ratio = GetUpscaleRatioFromQualityMode(qualityMode);
+ renderWidth = Mathf.RoundToInt(displayWidth / ratio);
+ renderHeight = Mathf.RoundToInt(displayHeight / ratio);
+ }
+
+ public static float GetMipmapBiasOffset(int renderWidth, int displayWidth)
+ {
+ return Mathf.Log((float)renderWidth / displayWidth, 2.0f) - 1.0f;
+ }
+
+ public static int GetJitterPhaseCount(int renderWidth, int displayWidth)
+ {
+ const float basePhaseCount = 8.0f;
+ int jitterPhaseCount = (int)(basePhaseCount * Mathf.Pow((float)displayWidth / renderWidth, 2.0f));
+ return jitterPhaseCount;
+ }
+
+ public static void GetJitterOffset(out float outX, out float outY, int index, int phaseCount)
+ {
+ outX = Halton((index % phaseCount) + 1, 2) - 0.5f;
+ outY = Halton((index % phaseCount) + 1, 3) - 0.5f;
+ }
+
+ // Calculate halton number for index and base.
+ private static float Halton(int index, int @base)
+ {
+ float f = 1.0f, result = 0.0f;
+
+ for (int currentIndex = index; currentIndex > 0;) {
+
+ f /= @base;
+ result += f * (currentIndex % @base);
+ currentIndex = (int)Mathf.Floor((float)currentIndex / @base);
+ }
+
+ return result;
+ }
+
+ public static float Lanczos2(float value)
+ {
+ return Mathf.Abs(value) < Mathf.Epsilon ? 1.0f : Mathf.Sin(Mathf.PI * value) / (Mathf.PI * value) * (Mathf.Sin(0.5f * Mathf.PI * value) / (0.5f * Mathf.PI * value));
+ }
+
+#if !UNITY_2021_1_OR_NEWER
+ internal static void SetBufferData(this CommandBuffer commandBuffer, ComputeBuffer computeBuffer, Array data)
+ {
+ commandBuffer.SetComputeBufferData(computeBuffer, data);
+ }
+#endif
+
+ ///
+ /// Alternative for CommandBuffer.SetComputeTextureParam that guards against attempts to bind mip levels that don't exist.
+ ///
+ internal static void SetComputeTextureMipParam(this CommandBuffer commandBuffer, ComputeShader computeShader, int kernelIndex, int nameID, Texture texture, int mipLevel)
+ {
+ mipLevel = Math.Min(mipLevel, texture.mipmapCount - 1);
+ commandBuffer.SetComputeTextureParam(computeShader, kernelIndex, nameID, texture, mipLevel);
+ }
+
+ internal static void SetComputeResourceParam(this CommandBuffer commandBuffer, ComputeShader computeShader, int kernelIndex, int nameID, in ResourceView resource)
+ {
+ commandBuffer.SetComputeTextureParam(computeShader, kernelIndex, nameID, resource.RenderTarget, resource.MipLevel, resource.SubElement);
+ }
+
+ internal static void SetComputeConstantBufferParam(this CommandBuffer commandBuffer, ComputeShader computeShader, int nameID, ComputeBuffer buffer)
+ {
+ commandBuffer.SetComputeConstantBufferParam(computeShader, nameID, buffer, 0, buffer.stride);
+ }
+
+ internal static void SetGlobalResource(this CommandBuffer commandBuffer, int nameID, in ResourceView resource)
+ {
+ commandBuffer.SetGlobalTexture(nameID, resource.RenderTarget, resource.SubElement);
+ }
+
+ internal static void DestroyObject(UnityEngine.Object obj)
+ {
+ if (obj == null)
+ return;
+
+#if UNITY_EDITOR
+ if (Application.isPlaying && !UnityEditor.EditorApplication.isPaused)
+ UnityEngine.Object.Destroy(obj);
+ else
+ UnityEngine.Object.DestroyImmediate(obj);
+#else
+ UnityEngine.Object.Destroy(obj);
+#endif
+ }
+
+ public enum Variant
+ {
+ Quality, // Maintains the same image quality as the original FSR2.
+ Balanced, // Gives a significant improvement in both bandwidth savings and performance uplift while maintaining close image quality to the 'quality' preset.
+ Performance, // A more aggressive preset that will give you the highest performance with some quality sacrifices.
+ }
+
+ public enum QualityMode
+ {
+ NativeAA = 0,
+ UltraQuality = 1,
+ Quality = 2,
+ Balanced = 3,
+ Performance = 4,
+ UltraPerformance = 5,
+ }
+
+ [Flags]
+ public enum InitializationFlags
+ {
+ EnableHighDynamicRange = 1 << 0,
+ EnableDisplayResolutionMotionVectors = 1 << 1,
+ EnableMotionVectorsJitterCancellation = 1 << 2,
+ EnableDepthInverted = 1 << 3,
+ EnableDepthInfinite = 1 << 4,
+ EnableAutoExposure = 1 << 5,
+ EnableDynamicResolution = 1 << 6,
+ EnableFP16Usage = 1 << 7,
+ EnableDebugChecking = 1 << 8,
+ }
+
+ ///
+ /// A structure encapsulating the parameters required to initialize FidelityFX Super Resolution 2 upscaling.
+ ///
+ public struct ContextDescription
+ {
+ public InitializationFlags Flags;
+ public Variant Variant;
+ public Vector2Int MaxRenderSize;
+ public Vector2Int DisplaySize;
+ public AsrShaders Shaders;
+ }
+
+ ///
+ /// A structure encapsulating the parameters for dispatching the various passes of FidelityFX Super Resolution 2.
+ ///
+ public struct DispatchDescription
+ {
+ public ResourceView Color;
+ public ResourceView Depth;
+ public ResourceView MotionVectors;
+ public ResourceView Exposure; // optional
+ public ResourceView Reactive; // optional
+ public ResourceView TransparencyAndComposition; // optional
+ public ResourceView Output;
+ public Vector2 JitterOffset;
+ public Vector2 MotionVectorScale;
+ public Vector2Int RenderSize;
+ public Vector2Int InputResourceSize;
+ public bool EnableSharpening;
+ public float Sharpness;
+ public float FrameTimeDelta; // in seconds
+ public float PreExposure;
+ public bool Reset;
+ public float CameraNear;
+ public float CameraFar;
+ public float CameraFovAngleVertical;
+ public float ViewSpaceToMetersFactor;
+ public bool UseTextureArrays; // Enable texture array bindings, primarily used for HDRP and XR
+ }
+
+ ///
+ /// A structure encapsulating the parameters for automatic generation of a reactive mask.
+ ///
+ public struct GenerateReactiveDescription
+ {
+ public ResourceView ColorOpaqueOnly;
+ public ResourceView ColorPreUpscale;
+ public ResourceView OutReactive;
+ public Vector2Int RenderSize;
+ public float Scale;
+ public float CutoffThreshold;
+ public float BinaryValue;
+ public GenerateReactiveFlags Flags;
+
+ public static readonly GenerateReactiveDescription Default = new GenerateReactiveDescription
+ {
+ Scale = 0.5f,
+ CutoffThreshold = 0.2f,
+ BinaryValue = 0.9f,
+ Flags = GenerateReactiveFlags.ApplyTonemap | GenerateReactiveFlags.ApplyThreshold | GenerateReactiveFlags.UseComponentsMax,
+ };
+ }
+
+ [Flags]
+ public enum GenerateReactiveFlags
+ {
+ ApplyTonemap = 1 << 0,
+ ApplyInverseTonemap = 1 << 1,
+ ApplyThreshold = 1 << 2,
+ UseComponentsMax = 1 << 3,
+ }
+
+ [Serializable, StructLayout(LayoutKind.Sequential)]
+ internal struct UpscalerConstants
+ {
+ public Vector2Int renderSize;
+ public Vector2Int maxRenderSize;
+ public Vector2Int displaySize;
+ public Vector2Int inputColorResourceDimensions;
+ public Vector2Int lumaMipDimensions;
+ public int lumaMipLevelToUse;
+ public int frameIndex;
+
+ public Vector4 deviceToViewDepth;
+ public Vector2 jitterOffset;
+ public Vector2 motionVectorScale;
+ public Vector2 downscaleFactor;
+ public Vector2 motionVectorJitterCancellation;
+ public float preExposure;
+ public float previousFramePreExposure;
+ public float tanHalfFOV;
+ public float jitterPhaseCount;
+ public float deltaTime;
+ public float dynamicResChangeFactor;
+ public float viewSpaceToMetersFactor;
+
+ public float padding;
+ }
+
+ [Serializable, StructLayout(LayoutKind.Sequential)]
+ internal struct SpdConstants
+ {
+ public uint mips;
+ public uint numWorkGroups;
+ public uint workGroupOffsetX, workGroupOffsetY;
+ public uint renderSizeX, renderSizeY;
+ }
+
+ [Serializable, StructLayout(LayoutKind.Sequential)]
+ internal struct GenerateReactiveConstants
+ {
+ public float scale;
+ public float threshold;
+ public float binaryValue;
+ public uint flags;
+ }
+
+ [Serializable, StructLayout(LayoutKind.Sequential)]
+ internal struct RcasConstants
+ {
+ public RcasConstants(uint sharpness, uint halfSharp)
+ {
+ this.sharpness = sharpness;
+ this.halfSharp = halfSharp;
+ dummy0 = dummy1 = 0;
+ }
+
+ public readonly uint sharpness;
+ public readonly uint halfSharp;
+ public readonly uint dummy0;
+ public readonly uint dummy1;
+ }
+ }
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs.meta
new file mode 100644
index 0000000..e36c358
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/Asr.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: c7350363c6d8a2b4096a9ed97dc4ed95
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {instanceID: 0}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs
new file mode 100644
index 0000000..1d65342
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs
@@ -0,0 +1,136 @@
+using UnityEngine;
+using UnityEngine.Rendering;
+
+namespace ArmASR
+{
+ ///
+ /// Scriptable object containing all shader resources required by Arm Accuracy Super Resolution (ASR).
+ /// These can be stored in an asset file and referenced from a scene or prefab, avoiding the need to load the shaders from a Resources folder.
+ ///
+ [CreateAssetMenu(fileName = "ASR Assets", menuName = "ARM/ASR Assets", order = 1102)]
+ public class AsrAssets : ScriptableObject
+ {
+ public AsrShaderBundle shaderBundle;
+
+#if UNITY_EDITOR
+ private void Reset()
+ {
+ shaderBundle = new AsrShaderBundle
+ {
+ legacyShaders = new AsrShaders
+ {
+ fragmentShader = FindFragmentShader("ffxm_fsr2_fragment_legacy"),
+ computeLuminancePyramidPass = FindComputeShader("ffxm_fsr2_luma_pyramid_legacy"),
+ lockPass = FindComputeShader("ffxm_fsr2_lock_legacy"),
+ },
+ modernShaders = new AsrShaders
+ {
+ fragmentShader = FindFragmentShader("ffxm_fsr2_fragment_modern"),
+ computeLuminancePyramidPass = FindComputeShader("ffxm_fsr2_luma_pyramid_modern"),
+ lockPass = FindComputeShader("ffxm_fsr2_lock_modern"),
+ },
+ };
+ }
+
+ private static Shader FindFragmentShader(string name)
+ {
+ string[] assetGuids = UnityEditor.AssetDatabase.FindAssets($"t:Shader {name}");
+ if (assetGuids == null || assetGuids.Length == 0)
+ return null;
+
+ string assetPath = UnityEditor.AssetDatabase.GUIDToAssetPath(assetGuids[0]);
+ return UnityEditor.AssetDatabase.LoadAssetAtPath(assetPath);
+ }
+
+ private static ComputeShader FindComputeShader(string name)
+ {
+ string[] assetGuids = UnityEditor.AssetDatabase.FindAssets($"t:ComputeShader {name}");
+ if (assetGuids == null || assetGuids.Length == 0)
+ return null;
+
+ string assetPath = UnityEditor.AssetDatabase.GUIDToAssetPath(assetGuids[0]);
+ return UnityEditor.AssetDatabase.LoadAssetAtPath(assetPath);
+ }
+#endif
+ }
+
+ [System.Serializable]
+ public class AsrShaderBundle
+ {
+ public AsrShaders legacyShaders;
+
+ public AsrShaders modernShaders;
+
+ public AsrShaders GetShadersForCurrentPlatform()
+ {
+ switch (SystemInfo.graphicsDeviceType)
+ {
+ case GraphicsDeviceType.Direct3D12:
+ case GraphicsDeviceType.Vulkan:
+ case GraphicsDeviceType.Metal:
+ case GraphicsDeviceType.PlayStation5:
+ case GraphicsDeviceType.PlayStation5NGGC:
+ case GraphicsDeviceType.GameCoreXboxSeries:
+ return modernShaders;
+ default:
+ return legacyShaders;
+ }
+ }
+ }
+
+ ///
+ /// All the compute shaders used by ASR.
+ ///
+ [System.Serializable]
+ public class AsrShaders
+ {
+ ///
+ /// Combined shader file containing all non-compute passes.
+ ///
+ public Shader fragmentShader;
+
+ ///
+ /// The compute shader used by the luminance pyramid computation pass.
+ ///
+ public ComputeShader computeLuminancePyramidPass;
+
+ ///
+ /// The compute shader used by the lock pass.
+ ///
+ public ComputeShader lockPass;
+
+ ///
+ /// Returns a copy of this class and its contents.
+ ///
+ public AsrShaders Clone()
+ {
+ return (AsrShaders)MemberwiseClone();
+ }
+
+ ///
+ /// Returns a copy of this class with clones of all its shaders.
+ /// This can be useful if you're running multiple ASR instances with different shader configurations.
+ /// Be sure to clean up these clones through Dispose once you're done with them.
+ ///
+ public AsrShaders DeepCopy()
+ {
+ return new AsrShaders
+ {
+ fragmentShader = Object.Instantiate(fragmentShader),
+ computeLuminancePyramidPass = Object.Instantiate(computeLuminancePyramidPass),
+ lockPass = Object.Instantiate(lockPass),
+ };
+ }
+
+ ///
+ /// Destroy all the shaders within this instance.
+ /// Use this only on clones created through DeepCopy.
+ ///
+ public void Dispose()
+ {
+ Object.Destroy(fragmentShader);
+ Object.Destroy(computeLuminancePyramidPass);
+ Object.Destroy(lockPass);
+ }
+ }
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs.meta
new file mode 100644
index 0000000..022b8ab
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrAssets.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 7a41695239eb36740847744b34c5af43
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {instanceID: 0}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrCallbacks.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrCallbacks.cs
new file mode 100644
index 0000000..505767b
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrCallbacks.cs
@@ -0,0 +1,61 @@
+using UnityEngine;
+
+namespace ArmASR
+{
+ ///
+ /// A collection of callbacks required by the ASR process.
+ /// This allows some customization by the game dev on how to integrate ASR upscaling into their own game setup.
+ ///
+ public interface IAsrCallbacks
+ {
+ ///
+ /// Apply a mipmap bias to in-game textures to prevent them from becoming blurry as the internal rendering resolution lowers.
+ /// This will need to be customized on a per-game basis, as there is no clear universal way to determine what are "in-game" textures.
+ /// The default implementation will simply apply a mipmap bias to all 2D textures, which will include things like UI textures and which might miss things like terrain texture arrays.
+ ///
+ /// Depending on how your game organizes its assets, you will want to create a filter that more specifically selects the textures that need to have this mipmap bias applied.
+ /// You may also want to store the bias offset value and apply it to any assets that are loaded in on demand.
+ ///
+ void ApplyMipmapBias(float biasOffset);
+
+ void UndoMipmapBias();
+ }
+
+ ///
+ /// Default implementation of IAsrCallbacks.
+ /// These are fine for testing but a proper game will want to extend and override these methods.
+ ///
+ public class AsrCallbacksBase: IAsrCallbacks
+ {
+ protected float CurrentBiasOffset = 0;
+
+ public virtual void ApplyMipmapBias(float biasOffset)
+ {
+ if (float.IsNaN(biasOffset) || float.IsInfinity(biasOffset))
+ return;
+
+ CurrentBiasOffset += biasOffset;
+
+ if (Mathf.Approximately(CurrentBiasOffset, 0f))
+ {
+ CurrentBiasOffset = 0f;
+ }
+
+ foreach (var texture in Resources.FindObjectsOfTypeAll())
+ {
+ if (texture.mipmapCount <= 1)
+ continue;
+
+ texture.mipMapBias += biasOffset;
+ }
+ }
+
+ public virtual void UndoMipmapBias()
+ {
+ if (CurrentBiasOffset == 0f)
+ return;
+
+ ApplyMipmapBias(-CurrentBiasOffset);
+ }
+ }
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrCallbacks.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrCallbacks.cs.meta
new file mode 100644
index 0000000..2724280
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrCallbacks.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 78f16fcb80e6325429dfa567a4ed5d4a
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {instanceID: 0}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs
new file mode 100644
index 0000000..e436142
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs
@@ -0,0 +1,539 @@
+using System;
+using System.Runtime.InteropServices;
+using UnityEngine;
+using UnityEngine.Rendering;
+
+namespace ArmASR
+{
+ ///
+ /// This class loosely matches the FfxFsr2Context struct from the original FSR2 codebase.
+ /// It manages the various resources and compute passes required by the ASR process.
+ /// Note that this class does not know anything about Unity render pipelines; all it knows is CommandBuffers and RenderTargetIdentifiers.
+ /// This should make it suitable for integration with any of the available Unity render pipelines.
+ ///
+ public class AsrContext
+ {
+ private const int MaxQueuedFrames = 16;
+
+ private Asr.ContextDescription _contextDescription;
+
+ private AsrPass _computeLuminancePyramidPass;
+ private AsrPass _reconstructPreviousDepthPass;
+ private AsrPass _depthClipPass;
+ private AsrPass _lockPass;
+ private AsrPass _accumulatePass;
+ private AsrPass _sharpenPass;
+ private AsrPass _generateReactivePass;
+ private AsrPass _tcrAutogeneratePass;
+
+ private readonly AsrResources _resources = new AsrResources();
+ private readonly AsrKeywords _keywords = new AsrKeywords();
+
+ private ComputeBuffer _upscalerConstantsBuffer;
+ private readonly Asr.UpscalerConstants[] _upscalerConstantsArray = { new Asr.UpscalerConstants() };
+ private ref Asr.UpscalerConstants UpscalerConsts => ref _upscalerConstantsArray[0];
+
+ private ComputeBuffer _spdConstantsBuffer;
+ private readonly Asr.SpdConstants[] _spdConstantsArray = { new Asr.SpdConstants() };
+ private ref Asr.SpdConstants SpdConsts => ref _spdConstantsArray[0];
+
+ private ComputeBuffer _rcasConstantsBuffer;
+ private readonly Asr.RcasConstants[] _rcasConstantsArray = new Asr.RcasConstants[1];
+ private ref Asr.RcasConstants RcasConsts => ref _rcasConstantsArray[0];
+
+ private ComputeBuffer _generateReactiveConstantsBuffer;
+ private readonly Asr.GenerateReactiveConstants[] _generateReactiveConstantsArray = { new Asr.GenerateReactiveConstants() };
+ private ref Asr.GenerateReactiveConstants GenReactiveConsts => ref _generateReactiveConstantsArray[0];
+
+ private bool _firstExecution;
+ private Vector2 _previousJitterOffset;
+ private int _resourceFrameIndex;
+
+ public void Create(in Asr.ContextDescription contextDescription)
+ {
+ _contextDescription = contextDescription;
+ _contextDescription.Flags |= Asr.InitializationFlags.EnableFP16Usage; // Always force FP16 code path
+
+ _upscalerConstantsBuffer = CreateConstantBuffer();
+ _spdConstantsBuffer = CreateConstantBuffer();
+ _rcasConstantsBuffer = CreateConstantBuffer();
+ _generateReactiveConstantsBuffer = CreateConstantBuffer();
+
+ // Set defaults
+ _firstExecution = true;
+ _resourceFrameIndex = 0;
+
+ UpscalerConsts.displaySize = _contextDescription.DisplaySize;
+
+ _resources.Create(_contextDescription);
+ CreatePasses();
+ }
+
+ private void CreatePasses()
+ {
+ _computeLuminancePyramidPass = new AsrComputeLuminancePyramidPass(_contextDescription, _resources, _upscalerConstantsBuffer, _spdConstantsBuffer);
+ _reconstructPreviousDepthPass = new AsrReconstructPreviousDepthPass(_contextDescription, _resources, _upscalerConstantsBuffer);
+ _depthClipPass = new AsrDepthClipPass(_contextDescription, _resources, _upscalerConstantsBuffer);
+ _lockPass = new AsrLockPass(_contextDescription, _resources, _upscalerConstantsBuffer);
+ _accumulatePass = new AsrAccumulatePass(_contextDescription, _resources, _upscalerConstantsBuffer);
+ _sharpenPass = new AsrSharpenPass(_contextDescription, _resources, _upscalerConstantsBuffer, _rcasConstantsBuffer);
+ _generateReactivePass = new AsrGenerateReactivePass(_contextDescription, _resources, _upscalerConstantsBuffer, _generateReactiveConstantsBuffer);
+ }
+
+ public void Destroy()
+ {
+ DestroyPass(ref _tcrAutogeneratePass);
+ DestroyPass(ref _generateReactivePass);
+ DestroyPass(ref _sharpenPass);
+ DestroyPass(ref _accumulatePass);
+ DestroyPass(ref _lockPass);
+ DestroyPass(ref _depthClipPass);
+ DestroyPass(ref _reconstructPreviousDepthPass);
+ DestroyPass(ref _computeLuminancePyramidPass);
+
+ _resources.Destroy();
+
+ DestroyConstantBuffer(ref _generateReactiveConstantsBuffer);
+ DestroyConstantBuffer(ref _rcasConstantsBuffer);
+ DestroyConstantBuffer(ref _spdConstantsBuffer);
+ DestroyConstantBuffer(ref _upscalerConstantsBuffer);
+ }
+
+ public void Dispatch(Asr.DispatchDescription dispatchParams, CommandBuffer commandBuffer)
+ {
+ if ((_contextDescription.Flags & Asr.InitializationFlags.EnableDebugChecking) != 0)
+ {
+ DebugCheckDispatch(dispatchParams);
+ }
+
+ if (dispatchParams.UseTextureArrays)
+ commandBuffer.EnableShaderKeyword("UNITY_FFXM_TEXTURE2D_X_ARRAY");
+
+ _keywords.ApplyKeywords(commandBuffer, _contextDescription.Variant, _contextDescription.Flags, dispatchParams);
+
+ AsrResources.CreateAliasableResources(commandBuffer, _contextDescription, dispatchParams);
+
+ if (_firstExecution)
+ {
+ commandBuffer.SetRenderTarget(_resources.LockStatus[0]);
+ commandBuffer.ClearRenderTarget(false, true, Color.clear);
+ commandBuffer.SetRenderTarget(_resources.LockStatus[1]);
+ commandBuffer.ClearRenderTarget(false, true, Color.clear);
+ commandBuffer.SetRenderTarget(AsrShaderIDs.UavPreparedInputColor);
+ commandBuffer.ClearRenderTarget(false, true, Color.clear);
+ }
+
+ int frameIndex = _resourceFrameIndex % 2;
+ bool resetAccumulation = dispatchParams.Reset || _firstExecution;
+ _firstExecution = false;
+
+ // If auto exposure is enabled use the auto exposure SRV, otherwise what the app sends
+ if ((_contextDescription.Flags & Asr.InitializationFlags.EnableAutoExposure) != 0)
+ dispatchParams.Exposure = new ResourceView(_resources.AutoExposure[frameIndex]);
+ else if (!dispatchParams.Exposure.IsValid)
+ dispatchParams.Exposure = new ResourceView(_resources.DefaultExposure);
+
+ if (!dispatchParams.Reactive.IsValid) dispatchParams.Reactive = new ResourceView(_resources.DefaultReactive);
+ if (!dispatchParams.TransparencyAndComposition.IsValid) dispatchParams.TransparencyAndComposition = new ResourceView(_resources.DefaultReactive);
+
+ SetupConstants(dispatchParams, resetAccumulation);
+
+ // Reactive mask bias
+ const int threadGroupWorkRegionDim = 8;
+ int dispatchSrcX = (UpscalerConsts.renderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+ int dispatchSrcY = (UpscalerConsts.renderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+
+ bool applyPerfModeOptimizations = _contextDescription.Variant == Asr.Variant.Performance;
+ bool applyBalancedModeOptimizations = _contextDescription.Variant == Asr.Variant.Balanced;
+ bool isBalancedOrPerformance = applyBalancedModeOptimizations || applyPerfModeOptimizations;
+
+ // Clear reconstructed depth for max depth store
+ if (resetAccumulation)
+ {
+ commandBuffer.SetRenderTarget(_resources.LockStatus[frameIndex ^ 1]);
+ commandBuffer.ClearRenderTarget(false, true, Color.clear);
+
+ commandBuffer.SetRenderTarget(_resources.InternalUpscaled[frameIndex ^ 1]);
+ commandBuffer.ClearRenderTarget(false, true, Color.clear);
+
+ if (isBalancedOrPerformance)
+ {
+ commandBuffer.SetRenderTarget(_resources.InternalReactive[frameIndex ^ 1]);
+ commandBuffer.ClearRenderTarget(false, true, Color.clear);
+ }
+
+ commandBuffer.SetRenderTarget(_resources.SceneLuminance);
+ commandBuffer.ClearRenderTarget(false, true, Color.clear);
+
+ // Auto exposure always used to track luma changes in locking logic
+ commandBuffer.SetRenderTarget(_resources.AutoExposure[frameIndex ^ 1]);
+ commandBuffer.ClearRenderTarget(false, true, new Color(0f, 1e8f, 0f, 0f));
+
+ // Reset atomic counter to 0
+ commandBuffer.SetRenderTarget(_resources.SpdAtomicCounter);
+ commandBuffer.ClearRenderTarget(false, true, Color.clear);
+ }
+
+ // Need to clear here since we need the content of this surface for frame interpolation, so clearing in the lock pass is not an option
+ bool depthInverted = (_contextDescription.Flags & Asr.InitializationFlags.EnableDepthInverted) == Asr.InitializationFlags.EnableDepthInverted;
+ commandBuffer.SetRenderTarget(AsrShaderIDs.UavReconstructedPrevNearestDepth);
+ commandBuffer.ClearRenderTarget(false, true, depthInverted ? Color.clear : Color.white);
+
+ commandBuffer.SetRenderTarget(AsrShaderIDs.UavNewLocks);
+ commandBuffer.ClearRenderTarget(false, true, Color.clear);
+
+ // Auto exposure
+ SetupSpdConstants(dispatchParams, out var dispatchThreadGroupCount);
+
+ // Initialize constant buffers data
+ commandBuffer.SetBufferData(_upscalerConstantsBuffer, _upscalerConstantsArray);
+ commandBuffer.SetBufferData(_spdConstantsBuffer, _spdConstantsArray);
+
+ // Compute luminance pyramid
+ _computeLuminancePyramidPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchThreadGroupCount.x, dispatchThreadGroupCount.y);
+
+ // Reconstruct previous depth
+ _reconstructPreviousDepthPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex);
+
+ // Depth clip
+ _depthClipPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex);
+
+ // Create locks
+ _lockPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY);
+
+ // Accumulate
+ _accumulatePass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex);
+
+ if (dispatchParams.EnableSharpening)
+ {
+ // Compute the constants
+ SetupRcasConstants(dispatchParams);
+ commandBuffer.SetBufferData(_rcasConstantsBuffer, _rcasConstantsArray);
+
+ // Dispatch RCAS
+ _sharpenPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex);
+ }
+
+ _resourceFrameIndex = (_resourceFrameIndex + 1) % MaxQueuedFrames;
+
+ AsrResources.DestroyAliasableResources(commandBuffer);
+
+ commandBuffer.DisableShaderKeyword("UNITY_FFXM_TEXTURE2D_X_ARRAY");
+ }
+
+ public void GenerateReactiveMask(in Asr.GenerateReactiveDescription dispatchParams, CommandBuffer commandBuffer)
+ {
+ GenReactiveConsts.scale = dispatchParams.Scale;
+ GenReactiveConsts.threshold = dispatchParams.CutoffThreshold;
+ GenReactiveConsts.binaryValue = dispatchParams.BinaryValue;
+ GenReactiveConsts.flags = (uint)dispatchParams.Flags;
+ commandBuffer.SetBufferData(_generateReactiveConstantsBuffer, _generateReactiveConstantsArray);
+
+ ((AsrGenerateReactivePass)_generateReactivePass).ScheduleDispatch(commandBuffer, dispatchParams);
+ }
+
+ private void SetupConstants(in Asr.DispatchDescription dispatchParams, bool resetAccumulation)
+ {
+ ref Asr.UpscalerConstants constants = ref UpscalerConsts;
+
+ constants.jitterOffset = dispatchParams.JitterOffset;
+ constants.renderSize = dispatchParams.RenderSize;
+ constants.maxRenderSize = _contextDescription.MaxRenderSize;
+ constants.inputColorResourceDimensions = dispatchParams.InputResourceSize;
+
+ // Compute the horizontal FOV for the shader from the vertical one
+ float aspectRatio = (float)dispatchParams.RenderSize.x / dispatchParams.RenderSize.y;
+ float cameraAngleHorizontal = Mathf.Atan(Mathf.Tan(dispatchParams.CameraFovAngleVertical / 2.0f) * aspectRatio) * 2.0f;
+ constants.tanHalfFOV = Mathf.Tan(cameraAngleHorizontal * 0.5f);
+ constants.viewSpaceToMetersFactor = (dispatchParams.ViewSpaceToMetersFactor > 0.0f) ? dispatchParams.ViewSpaceToMetersFactor : 1.0f;
+
+ // Compute params to enable device depth to view space depth computation in shader
+ constants.deviceToViewDepth = SetupDeviceDepthToViewSpaceDepthParams(dispatchParams);
+
+ // To be updated if resource is larger than the actual image size
+ constants.downscaleFactor = new Vector2((float)constants.renderSize.x / _contextDescription.DisplaySize.x, (float)constants.renderSize.y / _contextDescription.DisplaySize.y);
+ constants.previousFramePreExposure = constants.preExposure;
+ constants.preExposure = (dispatchParams.PreExposure != 0) ? dispatchParams.PreExposure : 1.0f;
+
+ // Motion vector data
+ Vector2Int motionVectorsTargetSize = (_contextDescription.Flags & Asr.InitializationFlags.EnableDisplayResolutionMotionVectors) != 0 ? constants.displaySize : constants.renderSize;
+ constants.motionVectorScale = dispatchParams.MotionVectorScale / motionVectorsTargetSize;
+
+ // Compute jitter cancellation
+ if ((_contextDescription.Flags & Asr.InitializationFlags.EnableMotionVectorsJitterCancellation) != 0)
+ {
+ constants.motionVectorJitterCancellation = (_previousJitterOffset - constants.jitterOffset) / motionVectorsTargetSize;
+ _previousJitterOffset = constants.jitterOffset;
+ }
+
+ int jitterPhaseCount = Asr.GetJitterPhaseCount(dispatchParams.RenderSize.x, _contextDescription.DisplaySize.x);
+ if (resetAccumulation || constants.jitterPhaseCount == 0)
+ {
+ constants.jitterPhaseCount = jitterPhaseCount;
+ }
+ else
+ {
+ int jitterPhaseCountDelta = (int)(jitterPhaseCount - constants.jitterPhaseCount);
+ if (jitterPhaseCountDelta > 0)
+ constants.jitterPhaseCount++;
+ else if (jitterPhaseCountDelta < 0)
+ constants.jitterPhaseCount--;
+ }
+
+ // Convert delta time to seconds and clamp to [0, 1]
+ constants.deltaTime = Mathf.Clamp01(dispatchParams.FrameTimeDelta);
+
+ if (resetAccumulation)
+ constants.frameIndex = 0;
+ else
+ constants.frameIndex++;
+
+ // Shading change usage of the SPD mip levels
+ constants.lumaMipLevelToUse = AsrPass.ShadingChangeMipLevel;
+
+ float mipDiv = 2 << constants.lumaMipLevelToUse;
+ constants.lumaMipDimensions.x = (int)(constants.maxRenderSize.x / mipDiv);
+ constants.lumaMipDimensions.y = (int)(constants.maxRenderSize.y / mipDiv);
+ }
+
+ private Vector4 SetupDeviceDepthToViewSpaceDepthParams(in Asr.DispatchDescription dispatchParams)
+ {
+ bool inverted = (_contextDescription.Flags & Asr.InitializationFlags.EnableDepthInverted) != 0;
+ bool infinite = (_contextDescription.Flags & Asr.InitializationFlags.EnableDepthInfinite) != 0;
+
+ // make sure it has no impact if near and far plane values are swapped in dispatch params
+ // the flags "inverted" and "infinite" will decide what transform to use
+ float min = Mathf.Min(dispatchParams.CameraNear, dispatchParams.CameraFar);
+ float max = Mathf.Max(dispatchParams.CameraNear, dispatchParams.CameraFar);
+
+ if (inverted)
+ {
+ (min, max) = (max, min);
+ }
+
+ float q = max / (min - max);
+ float d = -1.0f;
+
+ Vector4 matrixElemC = new Vector4(q, -1.0f - Mathf.Epsilon, q, 0.0f + Mathf.Epsilon);
+ Vector4 matrixElemE = new Vector4(q * min, -min - Mathf.Epsilon, q * min, max);
+
+ // Revert x and y coords
+ float aspect = (float)dispatchParams.RenderSize.x / dispatchParams.RenderSize.y;
+ float cotHalfFovY = Mathf.Cos(0.5f * dispatchParams.CameraFovAngleVertical) / Mathf.Sin(0.5f * dispatchParams.CameraFovAngleVertical);
+
+ int matrixIndex = (inverted ? 2 : 0) + (infinite ? 1 : 0);
+ return new Vector4(
+ d * matrixElemC[matrixIndex],
+ matrixElemE[matrixIndex],
+ aspect / cotHalfFovY,
+ 1.0f / cotHalfFovY);
+ }
+
+ private void SetupRcasConstants(in Asr.DispatchDescription dispatchParams)
+ {
+ int sharpnessIndex = Mathf.RoundToInt(Mathf.Clamp01(dispatchParams.Sharpness) * (RcasConfigs.Length - 1));
+ RcasConsts = RcasConfigs[sharpnessIndex];
+ }
+
+ private void SetupSpdConstants(in Asr.DispatchDescription dispatchParams, out Vector2Int dispatchThreadGroupCount)
+ {
+ RectInt rectInfo = new RectInt(0, 0, dispatchParams.RenderSize.x, dispatchParams.RenderSize.y);
+ SpdSetup(rectInfo, out dispatchThreadGroupCount, out var workGroupOffset, out var numWorkGroupsAndMips);
+
+ // Downsample
+ ref Asr.SpdConstants spdConstants = ref SpdConsts;
+ spdConstants.numWorkGroups = (uint)numWorkGroupsAndMips.x;
+ spdConstants.mips = (uint)numWorkGroupsAndMips.y;
+ spdConstants.workGroupOffsetX = (uint)workGroupOffset.x;
+ spdConstants.workGroupOffsetY = (uint)workGroupOffset.y;
+ spdConstants.renderSizeX = (uint)dispatchParams.RenderSize.x;
+ spdConstants.renderSizeY = (uint)dispatchParams.RenderSize.y;
+ }
+
+ private static void SpdSetup(RectInt rectInfo, out Vector2Int dispatchThreadGroupCount, out Vector2Int workGroupOffset, out Vector2Int numWorkGroupsAndMips, int mips = -1)
+ {
+ workGroupOffset = new Vector2Int(rectInfo.x / 64, rectInfo.y / 64);
+
+ int endIndexX = (rectInfo.x + rectInfo.width - 1) / 64;
+ int endIndexY = (rectInfo.y + rectInfo.height - 1) / 64;
+
+ dispatchThreadGroupCount = new Vector2Int(endIndexX + 1 - workGroupOffset.x, endIndexY + 1 - workGroupOffset.y);
+
+ numWorkGroupsAndMips = new Vector2Int(dispatchThreadGroupCount.x * dispatchThreadGroupCount.y, mips);
+ if (mips < 0)
+ {
+ float resolution = Math.Max(rectInfo.width, rectInfo.height);
+ numWorkGroupsAndMips.y = Math.Min(Mathf.FloorToInt(Mathf.Log(resolution, 2.0f)), 12);
+ }
+ }
+
+ private void DebugCheckDispatch(in Asr.DispatchDescription dispatchParams)
+ {
+ if (!dispatchParams.Color.IsValid)
+ {
+ Debug.LogError("Color resource is null");
+ }
+
+ if (!dispatchParams.Depth.IsValid)
+ {
+ Debug.LogError("Depth resource is null");
+ }
+
+ if (!dispatchParams.MotionVectors.IsValid)
+ {
+ Debug.LogError("MotionVectors resource is null");
+ }
+
+ if (!dispatchParams.Output.IsValid)
+ {
+ Debug.LogError("Output resource is null");
+ }
+
+ if (dispatchParams.Exposure.IsValid && (_contextDescription.Flags & Asr.InitializationFlags.EnableAutoExposure) != 0)
+ {
+ Debug.LogWarning("Exposure resource provided, however auto exposure flag is present");
+ }
+
+ if (Mathf.Abs(dispatchParams.JitterOffset.x) > 1.0f || Mathf.Abs(dispatchParams.JitterOffset.y) > 1.0f)
+ {
+ Debug.LogWarning("JitterOffset contains value outside of expected range [-1.0, 1.0]");
+ }
+
+ if (dispatchParams.MotionVectorScale.x > _contextDescription.MaxRenderSize.x || dispatchParams.MotionVectorScale.y > _contextDescription.MaxRenderSize.y)
+ {
+ Debug.LogWarning("MotionVectorScale contains scale value greater than MaxRenderSize");
+ }
+
+ if (dispatchParams.MotionVectorScale.x == 0.0f || dispatchParams.MotionVectorScale.y == 0.0f)
+ {
+ Debug.LogWarning("MotionVectorScale contains zero scale value");
+ }
+
+ if (dispatchParams.RenderSize.x > _contextDescription.MaxRenderSize.x || dispatchParams.RenderSize.y > _contextDescription.MaxRenderSize.y)
+ {
+ Debug.LogWarning("RenderSize is greater than context MaxRenderSize");
+ }
+
+ if (dispatchParams.RenderSize.x == 0 || dispatchParams.RenderSize.y == 0)
+ {
+ Debug.LogWarning("RenderSize contains zero dimension");
+ }
+
+ if (dispatchParams.FrameTimeDelta > 1.0f)
+ {
+ Debug.LogWarning("FrameTimeDelta is greater than 1.0f - this value should be seconds (~0.0166 for 60fps)");
+ }
+
+ if (dispatchParams.PreExposure == 0.0f)
+ {
+ Debug.LogError("PreExposure provided as 0.0f which is invalid");
+ }
+
+ bool infiniteDepth = (_contextDescription.Flags & Asr.InitializationFlags.EnableDepthInfinite) != 0;
+ bool inverseDepth = (_contextDescription.Flags & Asr.InitializationFlags.EnableDepthInverted) != 0;
+
+ if (inverseDepth)
+ {
+ if (dispatchParams.CameraNear < dispatchParams.CameraFar)
+ {
+ Debug.LogWarning("EnableDepthInverted flag is present yet CameraNear is less than CameraFar");
+ }
+
+ if (infiniteDepth)
+ {
+ if (dispatchParams.CameraNear < float.MaxValue)
+ {
+ Debug.LogWarning("EnableDepthInfinite and EnableDepthInverted present, yet CameraNear != float.MaxValue");
+ }
+ }
+
+ if (dispatchParams.CameraFar < 0.075f)
+ {
+ Debug.LogWarning("EnableDepthInverted present, CameraFar value is very low which may result in depth separation artefacting");
+ }
+ }
+ else
+ {
+ if (dispatchParams.CameraNear > dispatchParams.CameraFar)
+ {
+ Debug.LogWarning("CameraNear is greater than CameraFar in non-inverted-depth context");
+ }
+
+ if (infiniteDepth)
+ {
+ if (dispatchParams.CameraFar < float.MaxValue)
+ {
+ Debug.LogWarning("EnableDepthInfinite present, yet CameraFar != float.MaxValue");
+ }
+ }
+
+ if (dispatchParams.CameraNear < 0.075f)
+ {
+ Debug.LogWarning("CameraNear value is very low which may result in depth separation artefacting");
+ }
+ }
+
+ if (dispatchParams.CameraFovAngleVertical <= 0.0f)
+ {
+ Debug.LogError("CameraFovAngleVertical is 0.0f - this value should be > 0.0f");
+ }
+
+ if (dispatchParams.CameraFovAngleVertical > Mathf.PI)
+ {
+ Debug.LogError("CameraFovAngleVertical is greater than 180 degrees/PI");
+ }
+ }
+
+ ///
+ /// The ASR C++ codebase uses floats bitwise converted to ints to pass sharpness parameters to the RCAS shader.
+ /// This is not possible in C# without enabling unsafe code compilation, so to avoid that we instead use a table of precomputed values.
+ ///
+ private static readonly Asr.RcasConstants[] RcasConfigs = new []
+ {
+ new Asr.RcasConstants(1048576000u, 872428544u),
+ new Asr.RcasConstants(1049178080u, 877212745u),
+ new Asr.RcasConstants(1049823372u, 882390168u),
+ new Asr.RcasConstants(1050514979u, 887895276u),
+ new Asr.RcasConstants(1051256227u, 893859143u),
+ new Asr.RcasConstants(1052050675u, 900216232u),
+ new Asr.RcasConstants(1052902144u, 907032080u),
+ new Asr.RcasConstants(1053814727u, 914306687u),
+ new Asr.RcasConstants(1054792807u, 922105590u),
+ new Asr.RcasConstants(1055841087u, 930494326u),
+ new Asr.RcasConstants(1056964608u, 939538432u),
+ new Asr.RcasConstants(1057566688u, 944322633u),
+ new Asr.RcasConstants(1058211980u, 949500056u),
+ new Asr.RcasConstants(1058903587u, 955005164u),
+ new Asr.RcasConstants(1059644835u, 960969031u),
+ new Asr.RcasConstants(1060439283u, 967326120u),
+ new Asr.RcasConstants(1061290752u, 974141968u),
+ new Asr.RcasConstants(1062203335u, 981416575u),
+ new Asr.RcasConstants(1063181415u, 989215478u),
+ new Asr.RcasConstants(1064229695u, 997604214u),
+ new Asr.RcasConstants(1065353216u, 1006648320),
+ };
+
+ private static ComputeBuffer CreateConstantBuffer() where TConstants: struct
+ {
+ return new ComputeBuffer(1, Marshal.SizeOf(), ComputeBufferType.Constant);
+ }
+
+ private static void DestroyConstantBuffer(ref ComputeBuffer bufferRef)
+ {
+ if (bufferRef == null)
+ return;
+
+ bufferRef.Release();
+ bufferRef = null;
+ }
+
+ private static void DestroyPass(ref AsrPass pass)
+ {
+ if (pass == null)
+ return;
+
+ pass.Dispose();
+ pass = null;
+ }
+ }
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs.meta
new file mode 100644
index 0000000..3a18521
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrContext.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: c348b7c44539db74994c5846caec5871
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {instanceID: 0}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrKeywords.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrKeywords.cs
new file mode 100644
index 0000000..43de5e1
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrKeywords.cs
@@ -0,0 +1,162 @@
+using UnityEngine;
+using UnityEngine.Rendering;
+
+namespace ArmASR
+{
+ public class AsrKeywords
+ {
+ private static readonly string OptionHalfPrecision = "FFXM_HALF";
+ private static readonly string OptionHdrColorInput = "FFXM_FSR2_OPTION_HDR_COLOR_INPUT";
+ private static readonly string OptionLowResolutionMotionVectors = "FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS";
+ private static readonly string OptionJitteredMotionVectors = "FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS";
+ private static readonly string OptionInvertedDepth = "FFXM_FSR2_OPTION_INVERTED_DEPTH";
+ private static readonly string OptionReprojectUseLut = "FFXM_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE";
+ private static readonly string OptionApplySharpening = "FFXM_FSR2_OPTION_APPLY_SHARPENING";
+ private static readonly string OptionBalancedPreset = "FFXM_FSR2_OPTION_SHADER_OPT_BALANCED";
+ private static readonly string OptionPerformancePreset = "FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE";
+
+#if UNITY_2021_2_OR_NEWER
+ private readonly GlobalKeyword _halfPrecisionKeyword;
+ private readonly GlobalKeyword _hdrColorInputKeyword;
+ private readonly GlobalKeyword _lowResMotionVectorsKeyword;
+ private readonly GlobalKeyword _jitteredMotionVectorsKeyword;
+ private readonly GlobalKeyword _invertedDepthKeyword;
+ private readonly GlobalKeyword _reprojectUseLutKeyword;
+ private readonly GlobalKeyword _applySharpeningKeyword;
+ private readonly GlobalKeyword _balancedPresetKeyword;
+ private readonly GlobalKeyword _performancePresetKeyword;
+#endif
+
+ public AsrKeywords()
+ {
+#if UNITY_2021_2_OR_NEWER
+ _halfPrecisionKeyword = GlobalKeyword.Create(OptionHalfPrecision);
+ _hdrColorInputKeyword = GlobalKeyword.Create(OptionHdrColorInput);
+ _lowResMotionVectorsKeyword = GlobalKeyword.Create(OptionLowResolutionMotionVectors);
+ _jitteredMotionVectorsKeyword = GlobalKeyword.Create(OptionJitteredMotionVectors);
+ _invertedDepthKeyword = GlobalKeyword.Create(OptionInvertedDepth);
+ _reprojectUseLutKeyword = GlobalKeyword.Create(OptionReprojectUseLut);
+ _applySharpeningKeyword = GlobalKeyword.Create(OptionApplySharpening);
+ _balancedPresetKeyword = GlobalKeyword.Create(OptionBalancedPreset);
+ _performancePresetKeyword = GlobalKeyword.Create(OptionPerformancePreset);
+#endif
+ }
+
+ public void ApplyKeywords(CommandBuffer commandBuffer, Asr.Variant variant, Asr.InitializationFlags initFlags, in Asr.DispatchDescription dispatchParams)
+ {
+ bool useLut = false;
+#if UNITY_2022_1_OR_NEWER // This will also work in 2020.3.43+ and 2021.3.14+
+ if (SystemInfo.computeSubGroupSize == 64)
+ {
+ useLut = true;
+ }
+#endif
+
+ // This matches the permutation rules from the CreatePipeline* functions
+#if UNITY_2021_2_OR_NEWER
+ if ((initFlags & Asr.InitializationFlags.EnableFP16Usage) != 0)
+ commandBuffer.EnableKeyword(_halfPrecisionKeyword);
+ else
+ commandBuffer.DisableKeyword(_halfPrecisionKeyword);
+
+ if ((initFlags & Asr.InitializationFlags.EnableHighDynamicRange) != 0)
+ commandBuffer.EnableKeyword(_hdrColorInputKeyword);
+ else
+ commandBuffer.DisableKeyword(_hdrColorInputKeyword);
+
+ if ((initFlags & Asr.InitializationFlags.EnableDisplayResolutionMotionVectors) == 0)
+ commandBuffer.EnableKeyword(_lowResMotionVectorsKeyword);
+ else
+ commandBuffer.DisableKeyword(_lowResMotionVectorsKeyword);
+
+ if ((initFlags & Asr.InitializationFlags.EnableMotionVectorsJitterCancellation) != 0)
+ commandBuffer.EnableKeyword(_jitteredMotionVectorsKeyword);
+ else
+ commandBuffer.DisableKeyword(_jitteredMotionVectorsKeyword);
+
+ if ((initFlags & Asr.InitializationFlags.EnableDepthInverted) != 0)
+ commandBuffer.EnableKeyword(_invertedDepthKeyword);
+ else
+ commandBuffer.DisableKeyword(_invertedDepthKeyword);
+
+ if (useLut)
+ commandBuffer.EnableKeyword(_reprojectUseLutKeyword);
+ else
+ commandBuffer.DisableKeyword(_reprojectUseLutKeyword);
+
+ if (dispatchParams.EnableSharpening)
+ commandBuffer.EnableKeyword(_applySharpeningKeyword);
+ else
+ commandBuffer.DisableKeyword(_applySharpeningKeyword);
+
+ switch (variant)
+ {
+ case Asr.Variant.Quality:
+ commandBuffer.DisableKeyword(_balancedPresetKeyword);
+ commandBuffer.DisableKeyword(_performancePresetKeyword);
+ break;
+ case Asr.Variant.Balanced:
+ commandBuffer.EnableKeyword(_balancedPresetKeyword);
+ commandBuffer.DisableKeyword(_performancePresetKeyword);
+ break;
+ case Asr.Variant.Performance:
+ commandBuffer.EnableKeyword(_balancedPresetKeyword);
+ commandBuffer.EnableKeyword(_performancePresetKeyword);
+ break;
+ }
+#else
+ if ((initFlags & Asr.InitializationFlags.EnableFP16Usage) != 0)
+ commandBuffer.EnableShaderKeyword(OptionHalfPrecision);
+ else
+ commandBuffer.DisableShaderKeyword(OptionHalfPrecision);
+
+ if ((initFlags & Asr.InitializationFlags.EnableHighDynamicRange) != 0)
+ commandBuffer.EnableShaderKeyword(OptionHdrColorInput);
+ else
+ commandBuffer.DisableShaderKeyword(OptionHdrColorInput);
+
+ if ((initFlags & Asr.InitializationFlags.EnableDisplayResolutionMotionVectors) == 0)
+ commandBuffer.EnableShaderKeyword(OptionLowResolutionMotionVectors);
+ else
+ commandBuffer.DisableShaderKeyword(OptionLowResolutionMotionVectors);
+
+ if ((initFlags & Asr.InitializationFlags.EnableMotionVectorsJitterCancellation) != 0)
+ commandBuffer.EnableShaderKeyword(OptionJitteredMotionVectors);
+ else
+ commandBuffer.DisableShaderKeyword(OptionJitteredMotionVectors);
+
+ if ((initFlags & Asr.InitializationFlags.EnableDepthInverted) != 0)
+ commandBuffer.EnableShaderKeyword(OptionInvertedDepth);
+ else
+ commandBuffer.DisableShaderKeyword(OptionInvertedDepth);
+
+ if (useLut)
+ commandBuffer.EnableShaderKeyword(OptionReprojectUseLut);
+ else
+ commandBuffer.DisableShaderKeyword(OptionReprojectUseLut);
+
+ if (dispatchParams.EnableSharpening)
+ commandBuffer.EnableShaderKeyword(OptionApplySharpening);
+ else
+ commandBuffer.DisableShaderKeyword(OptionApplySharpening);
+
+ switch (variant)
+ {
+ case Asr.Variant.Quality:
+ commandBuffer.DisableShaderKeyword(OptionBalancedPreset);
+ commandBuffer.DisableShaderKeyword(OptionPerformancePreset);
+ break;
+ case Asr.Variant.Balanced:
+ commandBuffer.EnableShaderKeyword(OptionBalancedPreset);
+ commandBuffer.DisableShaderKeyword(OptionPerformancePreset);
+ break;
+ case Asr.Variant.Performance:
+ commandBuffer.EnableShaderKeyword(OptionBalancedPreset);
+ commandBuffer.EnableShaderKeyword(OptionPerformancePreset);
+ break;
+ }
+#endif
+
+ }
+ }
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrKeywords.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrKeywords.cs.meta
new file mode 100644
index 0000000..5b51256
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrKeywords.cs.meta
@@ -0,0 +1,3 @@
+fileFormatVersion: 2
+guid: a2636bdd1878444fb3d3475610d379df
+timeCreated: 1742641520
\ No newline at end of file
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs
new file mode 100644
index 0000000..b5abd87
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs
@@ -0,0 +1,338 @@
+using System;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using UnityEngine;
+using UnityEngine.Profiling;
+using UnityEngine.Rendering;
+
+namespace ArmASR
+{
+ ///
+ /// Base class for all the compute passes that make up the ASR process.
+ /// This loosely matches the FfxPipelineState struct from the original ASR codebase, wrapped in an object-oriented blanket.
+ /// These classes are responsible for loading compute shaders, managing temporary resources, binding resources to shader kernels and dispatching said shaders.
+ ///
+ internal abstract class AsrPass: IDisposable
+ {
+ internal const int ShadingChangeMipLevel = 4; // This matches the FFXM_FSR2_SHADING_CHANGE_MIP_LEVEL define
+
+ protected readonly Asr.ContextDescription ContextDescription;
+ protected readonly AsrResources Resources;
+ protected readonly ComputeBuffer Constants;
+
+ protected ComputeShader ComputeShader;
+ protected int KernelIndex;
+
+ protected Material FragmentMaterial;
+ protected int FragmentPass;
+ protected MaterialPropertyBlock FragmentProperties;
+
+ private CustomSampler _sampler;
+
+ protected AsrPass(in Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants)
+ {
+ ContextDescription = contextDescription;
+ Resources = resources;
+ Constants = constants;
+ }
+
+ public virtual void Dispose()
+ {
+ if (FragmentMaterial != null)
+ {
+ Asr.DestroyObject(FragmentMaterial);
+ FragmentMaterial = null;
+ }
+ }
+
+ public void ScheduleDispatch(CommandBuffer commandBuffer, in Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX = 0, int dispatchY = 0)
+ {
+ BeginSample(commandBuffer);
+ DoScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchX, dispatchY);
+ EndSample(commandBuffer);
+ }
+
+ protected abstract void DoScheduleDispatch(CommandBuffer commandBuffer, in Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY);
+
+ protected void InitComputeShader(string passName, ComputeShader shader)
+ {
+ if (shader == null)
+ {
+ throw new MissingReferenceException($"Shader for ASR pass '{passName}' could not be loaded! Please ensure it is included in the project correctly.");
+ }
+
+ ComputeShader = shader;
+ KernelIndex = ComputeShader.FindKernel("main");
+ _sampler = CustomSampler.Create(passName);
+ }
+
+ protected void InitFragmentShader(string passName, Shader shader, int passNumber)
+ {
+ if (shader == null)
+ {
+ throw new MissingReferenceException($"Shader for ASR pass '{passName}' could not be loaded! Please ensure it is included in the project correctly.");
+ }
+
+ FragmentMaterial = new Material(shader);
+ FragmentPass = passNumber;
+ FragmentProperties = new MaterialPropertyBlock();
+ _sampler = CustomSampler.Create(passName);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ protected void BlitFragment(CommandBuffer commandBuffer, RenderTargetIdentifier renderTarget)
+ {
+ commandBuffer.SetRenderTarget(renderTarget);
+ BlitFragment(commandBuffer);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ protected void BlitFragment(CommandBuffer commandBuffer, RenderTargetIdentifier[] renderTargets)
+ {
+ commandBuffer.SetRenderTarget(renderTargets, renderTargets[0]);
+ BlitFragment(commandBuffer);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private void BlitFragment(CommandBuffer commandBuffer)
+ {
+ commandBuffer.DrawProcedural(Matrix4x4.identity, FragmentMaterial, FragmentPass, MeshTopology.Triangles, 3, 1, FragmentProperties);
+ }
+
+ [Conditional("ENABLE_PROFILER"), MethodImpl(MethodImplOptions.AggressiveInlining)]
+ protected void BeginSample(CommandBuffer cmd)
+ {
+ cmd.BeginSample(_sampler);
+ }
+
+ [Conditional("ENABLE_PROFILER"), MethodImpl(MethodImplOptions.AggressiveInlining)]
+ protected void EndSample(CommandBuffer cmd)
+ {
+ cmd.EndSample(_sampler);
+ }
+ }
+
+ internal class AsrComputeLuminancePyramidPass : AsrPass
+ {
+ private readonly ComputeBuffer _spdConstants;
+
+ public AsrComputeLuminancePyramidPass(in Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants, ComputeBuffer spdConstants)
+ : base(contextDescription, resources, constants)
+ {
+ _spdConstants = spdConstants;
+
+ InitComputeShader("Compute Luminance Pyramid", contextDescription.Shaders.computeLuminancePyramidPass);
+ }
+
+ protected override void DoScheduleDispatch(CommandBuffer commandBuffer, in Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY)
+ {
+ commandBuffer.SetComputeResourceParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvInputColor, dispatchParams.Color);
+ commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvAutoExposure, Resources.AutoExposure[frameIndex ^ 1]);
+
+ commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavSpdAtomicCount, Resources.SpdAtomicCounter);
+ commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavExposureMipLumaChange, Resources.SceneLuminance, ShadingChangeMipLevel);
+ commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavExposureMip5, Resources.SceneLuminance, 5);
+ commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavAutoExposure, Resources.AutoExposure[frameIndex]);
+
+ commandBuffer.SetComputeConstantBufferParam(ComputeShader, AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride);
+ commandBuffer.SetComputeConstantBufferParam(ComputeShader, AsrShaderIDs.CbSpd, _spdConstants, 0, _spdConstants.stride);
+
+ commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1);
+ }
+ }
+
+ internal class AsrReconstructPreviousDepthPass : AsrPass
+ {
+ private readonly RenderTargetIdentifier[] _mrt = new RenderTargetIdentifier[3];
+
+ public AsrReconstructPreviousDepthPass(in Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants)
+ : base(contextDescription, resources, constants)
+ {
+ InitFragmentShader("Reconstruct & Dilate", contextDescription.Shaders.fragmentShader, 1);
+ }
+
+ protected override void DoScheduleDispatch(CommandBuffer commandBuffer, in Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY)
+ {
+ commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputColor, dispatchParams.Color);
+ commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputDepth, dispatchParams.Depth);
+ commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputMotionVectors, dispatchParams.MotionVectors);
+ commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputExposure, dispatchParams.Exposure);
+
+ // UAV binding in fragment shader, index needs to match the register binding in HLSL
+ commandBuffer.SetRandomWriteTarget(3, AsrShaderIDs.UavReconstructedPrevNearestDepth);
+
+ _mrt[0] = AsrShaderIDs.RtDilatedDepth; // fDepth
+ _mrt[1] = AsrShaderIDs.RtLockInputLuma; // fLuma
+ _mrt[2] = Resources.DilatedMotionVectors[frameIndex]; // fMotionVector
+
+ FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride);
+ BlitFragment(commandBuffer, _mrt);
+ commandBuffer.ClearRandomWriteTargets();
+ }
+ }
+
+ internal class AsrDepthClipPass : AsrPass
+ {
+ private readonly RenderTargetIdentifier[] _mrt = new RenderTargetIdentifier[2];
+
+ public AsrDepthClipPass(in Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants)
+ : base(contextDescription, resources, constants)
+ {
+ InitFragmentShader("Depth Clip", contextDescription.Shaders.fragmentShader, 2);
+ }
+
+ protected override void DoScheduleDispatch(CommandBuffer commandBuffer, in Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY)
+ {
+ commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputColor, dispatchParams.Color);
+ commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputDepth, dispatchParams.Depth);
+ commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputMotionVectors, dispatchParams.MotionVectors);
+ commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputExposure, dispatchParams.Exposure);
+ commandBuffer.SetGlobalResource(AsrShaderIDs.SrvReactiveMask, dispatchParams.Reactive);
+ commandBuffer.SetGlobalResource(AsrShaderIDs.SrvTransparencyAndCompositionMask, dispatchParams.TransparencyAndComposition);
+
+ commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvReconstructedPrevNearestDepth, AsrShaderIDs.UavReconstructedPrevNearestDepth);
+ commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]);
+ commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvDilatedDepth, AsrShaderIDs.UavDilatedDepth);
+ commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvPrevDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex ^ 1]);
+
+ _mrt[0] = AsrShaderIDs.RtPreparedInputColor; // fTonemapped
+ _mrt[1] = AsrShaderIDs.RtDilatedReactiveMasks; // fDilatedReactiveMasks
+
+ FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride);
+ BlitFragment(commandBuffer, _mrt);
+ }
+ }
+
+ internal class AsrLockPass : AsrPass
+ {
+ public AsrLockPass(in Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants)
+ : base(contextDescription, resources, constants)
+ {
+ InitComputeShader("Create Locks", contextDescription.Shaders.lockPass);
+ }
+
+ protected override void DoScheduleDispatch(CommandBuffer commandBuffer, in Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY)
+ {
+ commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.SrvLockInputLuma, AsrShaderIDs.UavLockInputLuma);
+
+ commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavNewLocks, AsrShaderIDs.UavNewLocks);
+ commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, AsrShaderIDs.UavReconstructedPrevNearestDepth, AsrShaderIDs.UavReconstructedPrevNearestDepth);
+
+ commandBuffer.SetComputeConstantBufferParam(ComputeShader, AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride);
+
+ commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1);
+ }
+ }
+
+ internal class AsrAccumulatePass : AsrPass
+ {
+ private readonly RenderTargetIdentifier[] _mrt = new RenderTargetIdentifier[4];
+
+ public AsrAccumulatePass(in Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants)
+ : base(contextDescription, resources, constants)
+ {
+ InitFragmentShader("Reproject & Accumulate", contextDescription.Shaders.fragmentShader, 3);
+ }
+
+ protected override void DoScheduleDispatch(CommandBuffer commandBuffer, in Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY)
+ {
+ if ((ContextDescription.Flags & Asr.InitializationFlags.EnableDisplayResolutionMotionVectors) == 0)
+ {
+ commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]);
+ }
+ else
+ {
+ commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputMotionVectors, dispatchParams.MotionVectors);
+ }
+
+ commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputExposure, dispatchParams.Exposure);
+
+ commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvDilatedReactiveMasks, AsrShaderIDs.UavDilatedReactiveMasks);
+ commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvInternalUpscaled, Resources.InternalUpscaled[frameIndex ^ 1]);
+ commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvLockStatus, Resources.LockStatus[frameIndex ^ 1]);
+ commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvPreparedInputColor, AsrShaderIDs.UavPreparedInputColor);
+ commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvLanczosLut, Resources.LanczosLut);
+ commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvUpscaleMaximumBiasLut, Resources.MaximumBiasLut);
+ commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvSceneLuminanceMips, Resources.SceneLuminance);
+ commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvAutoExposure, Resources.AutoExposure[frameIndex]);
+ commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvNewLocks, AsrShaderIDs.UavNewLocks);
+
+ if (ContextDescription.Variant == Asr.Variant.Quality)
+ {
+ commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvLumaHistory, Resources.LumaHistory[frameIndex ^ 1]);
+
+ _mrt[0] = Resources.InternalUpscaled[frameIndex]; // fColorAndWeight
+ _mrt[1] = Resources.LockStatus[frameIndex]; // fLockStatus
+ _mrt[2] = Resources.LumaHistory[frameIndex]; // fLumaHistory
+ }
+ else
+ {
+ commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvInternalTemporalReactive, Resources.InternalReactive[frameIndex ^ 1]);
+
+ _mrt[0] = Resources.InternalUpscaled[frameIndex]; // fUpscaledColor
+ _mrt[1] = Resources.InternalReactive[frameIndex]; // fTemporalReactive
+ _mrt[2] = Resources.LockStatus[frameIndex]; // fLockStatus
+ }
+
+ _mrt[3] = dispatchParams.EnableSharpening ? BuiltinRenderTextureType.None : dispatchParams.Output.RenderTarget; // fColor
+
+ FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride);
+ BlitFragment(commandBuffer, _mrt);
+ commandBuffer.ClearRandomWriteTargets();
+ }
+ }
+
+ internal class AsrSharpenPass : AsrPass
+ {
+ private readonly ComputeBuffer _rcasConstants;
+
+ public AsrSharpenPass(in Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants, ComputeBuffer rcasConstants)
+ : base(contextDescription, resources, constants)
+ {
+ _rcasConstants = rcasConstants;
+
+ InitFragmentShader("RCAS Sharpening", contextDescription.Shaders.fragmentShader, 4);
+ }
+
+ protected override void DoScheduleDispatch(CommandBuffer commandBuffer, in Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY)
+ {
+ commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputExposure, dispatchParams.Exposure);
+ commandBuffer.SetGlobalTexture(AsrShaderIDs.SrvRcasInput, Resources.InternalUpscaled[frameIndex]);
+
+ FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride);
+ FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbRcas, _rcasConstants, 0, _rcasConstants.stride);
+ BlitFragment(commandBuffer, dispatchParams.Output.RenderTarget);
+ }
+ }
+
+ internal class AsrGenerateReactivePass : AsrPass
+ {
+ private readonly ComputeBuffer _generateReactiveConstants;
+
+ public AsrGenerateReactivePass(in Asr.ContextDescription contextDescription, AsrResources resources, ComputeBuffer constants, ComputeBuffer generateReactiveConstants)
+ : base(contextDescription, resources, constants)
+ {
+ _generateReactiveConstants = generateReactiveConstants;
+
+ InitFragmentShader("Auto-Generate Reactive Mask", contextDescription.Shaders.fragmentShader, 0);
+ }
+
+ protected override void DoScheduleDispatch(CommandBuffer commandBuffer, in Asr.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY)
+ {
+ }
+
+ public void ScheduleDispatch(CommandBuffer commandBuffer, in Asr.GenerateReactiveDescription dispatchParams)
+ {
+ BeginSample(commandBuffer);
+
+ commandBuffer.SetGlobalResource(AsrShaderIDs.SrvOpaqueOnly, dispatchParams.ColorOpaqueOnly);
+ commandBuffer.SetGlobalResource(AsrShaderIDs.SrvInputColor, dispatchParams.ColorPreUpscale);
+
+ FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbFsr2, Constants, 0, Constants.stride);
+ FragmentProperties.SetConstantBuffer(AsrShaderIDs.CbGenReactive, _generateReactiveConstants, 0, _generateReactiveConstants.stride);
+ BlitFragment(commandBuffer, dispatchParams.OutReactive.RenderTarget);
+
+ EndSample(commandBuffer);
+ }
+ }
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs.meta
new file mode 100644
index 0000000..5b01d20
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrPass.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 7fb53d9f929886c4ab35be8d9010b9c3
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {instanceID: 0}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs
new file mode 100644
index 0000000..31e6634
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs
@@ -0,0 +1,227 @@
+using System;
+using UnityEngine;
+using UnityEngine.Experimental.Rendering;
+using UnityEngine.Rendering;
+
+namespace ArmASR
+{
+ ///
+ /// Helper class for bundling and managing persistent resources required by the ASR process.
+ /// This includes lookup tables, default fallback resources and double-buffered resources that get swapped between frames.
+ ///
+ internal class AsrResources
+ {
+ public Texture2D DefaultExposure;
+ public Texture2D DefaultReactive;
+ public Texture2D LanczosLut;
+ public Texture2D MaximumBiasLut;
+ public RenderTexture SpdAtomicCounter;
+ public RenderTexture SceneLuminance;
+ public readonly RenderTexture[] AutoExposure = new RenderTexture[2];
+ public readonly RenderTexture[] DilatedMotionVectors = new RenderTexture[2];
+ public readonly RenderTexture[] LockStatus = new RenderTexture[2];
+ public readonly RenderTexture[] InternalUpscaled = new RenderTexture[2];
+ public readonly RenderTexture[] InternalReactive = new RenderTexture[2];
+ public readonly RenderTexture[] LumaHistory = new RenderTexture[2];
+
+ public void Create(in Asr.ContextDescription contextDescription)
+ {
+ // Generate the data for the LUT
+ const int lanczos2LutWidth = 128;
+ float[] lanczos2Weights = new float[lanczos2LutWidth];
+ for (int currentLanczosWidthIndex = 0; currentLanczosWidthIndex < lanczos2LutWidth; ++currentLanczosWidthIndex)
+ {
+ float x = 2.0f * currentLanczosWidthIndex / (lanczos2LutWidth - 1);
+ float y = Asr.Lanczos2(x);
+ lanczos2Weights[currentLanczosWidthIndex] = y;
+ }
+
+ float[] maximumBias = new float[MaximumBiasTextureWidth * MaximumBiasTextureHeight];
+ for (int i = 0; i < maximumBias.Length; ++i)
+ {
+ maximumBias[i] = MaximumBias[i] / 2.0f;
+ }
+
+ GetFormatRequirements(contextDescription, out bool isBalancedOrPerformance, out _, out _, out GraphicsFormat r16Format, out GraphicsFormat rg16Format);
+
+ // Resource FSR2_LanczosLutData: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R16_SNORM, FFX_RESOURCE_FLAGS_NONE
+ // R16_SNorm textures are not supported by Unity on most platforms, strangely enough. So instead we use R32_SFloat and upload pre-normalized float data.
+ LanczosLut = new Texture2D(lanczos2LutWidth, 1, GraphicsFormat.R32_SFloat, TextureCreationFlags.None) { name = "ASR_LanczosLutData" };
+ LanczosLut.SetPixelData(lanczos2Weights, 0);
+ LanczosLut.Apply();
+
+ // Resource FSR2_MaximumUpsampleBias: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R16_SNORM, FFX_RESOURCE_FLAGS_NONE
+ MaximumBiasLut = new Texture2D(MaximumBiasTextureWidth, MaximumBiasTextureHeight, GraphicsFormat.R32_SFloat, TextureCreationFlags.None) { name = "ASR_MaximumUpsampleBias" };
+ MaximumBiasLut.SetPixelData(maximumBias, 0);
+ MaximumBiasLut.Apply();
+
+ // Resource FSR2_DefaultExposure: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R32G32_FLOAT, FFX_RESOURCE_FLAGS_NONE
+ DefaultExposure = new Texture2D(1, 1, GraphicsFormat.R32G32_SFloat, TextureCreationFlags.None) { name = "ASR_DefaultExposure" };
+ DefaultExposure.SetPixel(0, 0, Color.clear);
+ DefaultExposure.Apply();
+
+ // Resource FSR2_DefaultReactivityMask: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_NONE
+ DefaultReactive = new Texture2D(1, 1, GraphicsFormat.R8_UNorm, TextureCreationFlags.None) { name = "ASR_DefaultReactivityMask" };
+ DefaultReactive.SetPixel(0, 0, Color.clear);
+ DefaultReactive.Apply();
+
+ // Resource FSR2_SpdAtomicCounter: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_UINT, FFX_RESOURCE_FLAGS_ALIASABLE
+ // Despite what the original FSR2 codebase says, this resource really isn't aliasable. Resetting this counter to 0 every frame breaks auto-exposure on MacOS Metal.
+ SpdAtomicCounter = new RenderTexture(1, 1, 0, GraphicsFormat.R32_UInt) { name = "ASR_SpdAtomicCounter", enableRandomWrite = true };
+ SpdAtomicCounter.Create();
+
+ // Resource FSR2_ExposureMips: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE
+ // This is a rather special case: it's an aliasable resource, but because we require a mipmap chain and bind specific mip levels per shader, we can't easily use temporary RTs for this.
+ int w = contextDescription.MaxRenderSize.x / 2, h = contextDescription.MaxRenderSize.y / 2;
+ int mipCount = 1 + Mathf.FloorToInt(Mathf.Log(Math.Max(w, h), 2.0f));
+ SceneLuminance = new RenderTexture(w, h, 0, r16Format, mipCount) { name = "ASR_ExposureMips", enableRandomWrite = true, useMipMap = true, autoGenerateMips = false };
+ SceneLuminance.Create();
+
+ // Resource FSR2_AutoExposure: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32G32_FLOAT, FFX_RESOURCE_FLAGS_NONE
+ CreateDoubleBufferedResource(AutoExposure, "ASR_AutoExposure", Vector2Int.one, rg16Format, enableRandomWrite: true);
+
+ // Resources FSR2_InternalDilatedVelocity1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16_FLOAT, FFX_RESOURCE_FLAGS_NONE
+ CreateDoubleBufferedResource(DilatedMotionVectors, "ASR_InternalDilatedVelocity", contextDescription.MaxRenderSize, GraphicsFormat.R16G16_SFloat);
+
+ // Resources FSR2_LockStatus1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16_FLOAT, FFX_RESOURCE_FLAGS_NONE
+ CreateDoubleBufferedResource(LockStatus, "ASR_LockStatus", contextDescription.DisplaySize, GraphicsFormat.R16G16_SFloat);
+
+ // Resources FSR2_InternalUpscaled1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, FFX_RESOURCE_FLAGS_NONE
+ CreateDoubleBufferedResource(InternalUpscaled, "ASR_InternalUpscaled", contextDescription.DisplaySize, !isBalancedOrPerformance ? GraphicsFormat.R16G16B16A16_SFloat : GraphicsFormat.B10G11R11_UFloatPack32);
+
+ // Additional textures used by either balanced or performance presets
+ if (isBalancedOrPerformance)
+ {
+ // Resources FSR2_InternalReactive1/2: FFXM_RESOURCE_USAGE_RENDERTARGET, FFXM_SURFACE_FORMAT_R8_SNORM, FFXM_RESOURCE_FLAGS_NONE
+ CreateDoubleBufferedResource(InternalReactive, "ASR_InternalReactive", contextDescription.DisplaySize, GraphicsFormat.R8_SNorm);
+ }
+ else // Quality preset specific
+ {
+ // Resources FSR2_LumaHistory1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, FFX_RESOURCE_FLAGS_NONE
+ CreateDoubleBufferedResource(LumaHistory, "ASR_LumaHistory", contextDescription.DisplaySize, GraphicsFormat.R8G8B8A8_UNorm);
+ }
+ }
+
+ // Set up shared aliasable resources, i.e. temporary render textures
+ // These do not need to persist between frames, but they do need to be available between passes
+ public static void CreateAliasableResources(CommandBuffer commandBuffer, in Asr.ContextDescription contextDescription, in Asr.DispatchDescription dispatchParams)
+ {
+ Vector2Int displaySize = contextDescription.DisplaySize;
+ Vector2Int maxRenderSize = contextDescription.MaxRenderSize;
+
+ GetFormatRequirements(contextDescription, out _, out bool preparedInputColorNeedsFp16, out GraphicsFormat r8Format, out _, out _);
+
+ // FSR2_ReconstructedPrevNearestDepth: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_UINT, FFX_RESOURCE_FLAGS_ALIASABLE
+ commandBuffer.GetTemporaryRT(AsrShaderIDs.UavReconstructedPrevNearestDepth, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R32_UInt, 1, true);
+
+ // FSR2_DilatedDepth: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE
+ commandBuffer.GetTemporaryRT(AsrShaderIDs.UavDilatedDepth, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R32_SFloat, 1);
+
+ // FSR2_LockInputLuma: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE
+ commandBuffer.GetTemporaryRT(AsrShaderIDs.UavLockInputLuma, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R16_SFloat, 1);
+
+ // FSR2_DilatedReactiveMasks: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8G8_UNORM, FFX_RESOURCE_FLAGS_ALIASABLE
+ commandBuffer.GetTemporaryRT(AsrShaderIDs.UavDilatedReactiveMasks, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R8G8_UNorm, 1);
+
+ // FSR2_PreparedInputColor: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE
+ commandBuffer.GetTemporaryRT(AsrShaderIDs.UavPreparedInputColor, maxRenderSize.x, maxRenderSize.y, 0, default, preparedInputColorNeedsFp16 ? GraphicsFormat.R16G16B16A16_SFloat : GraphicsFormat.R8G8B8A8_UNorm, 1);
+
+ // FSR2_NewLocks: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_ALIASABLE
+ commandBuffer.GetTemporaryRT(AsrShaderIDs.UavNewLocks, displaySize.x, displaySize.y, 0, default, r8Format, 1, true);
+ }
+
+ private static void GetFormatRequirements(in Asr.ContextDescription contextDescription,
+ out bool isBalancedOrPerformance, out bool preparedInputColorNeedsFP16,
+ out GraphicsFormat r8Format, out GraphicsFormat r16Format, out GraphicsFormat rg16Format)
+ {
+ bool applyPerfModeOptimizations = contextDescription.Variant == Asr.Variant.Performance;
+ bool applyBalancedModeOptimizations = contextDescription.Variant == Asr.Variant.Balanced;
+ isBalancedOrPerformance = applyBalancedModeOptimizations || applyPerfModeOptimizations;
+ preparedInputColorNeedsFP16 = !applyPerfModeOptimizations;
+
+ // OpenGLES 3.2 specific: We need to work around some GLES limitations for some resources.
+ bool isOpenGLES = SystemInfo.graphicsDeviceType == GraphicsDeviceType.OpenGLES3;
+ r8Format = isOpenGLES ? GraphicsFormat.R32_SFloat : GraphicsFormat.R8_UNorm;
+ r16Format = isOpenGLES ? GraphicsFormat.R32_SFloat : GraphicsFormat.R16_SFloat;
+ rg16Format = isOpenGLES ? GraphicsFormat.R16G16B16A16_SFloat : GraphicsFormat.R16G16_SFloat;
+ }
+
+ public static void DestroyAliasableResources(CommandBuffer commandBuffer)
+ {
+ // Release all the aliasable resources used this frame
+ commandBuffer.ReleaseTemporaryRT(AsrShaderIDs.UavReconstructedPrevNearestDepth);
+ commandBuffer.ReleaseTemporaryRT(AsrShaderIDs.UavDilatedDepth);
+ commandBuffer.ReleaseTemporaryRT(AsrShaderIDs.UavLockInputLuma);
+ commandBuffer.ReleaseTemporaryRT(AsrShaderIDs.UavDilatedReactiveMasks);
+ commandBuffer.ReleaseTemporaryRT(AsrShaderIDs.UavPreparedInputColor);
+ commandBuffer.ReleaseTemporaryRT(AsrShaderIDs.UavNewLocks);
+ }
+
+ private static void CreateDoubleBufferedResource(RenderTexture[] resource, string name, Vector2Int size, GraphicsFormat format, bool enableRandomWrite = false)
+ {
+ for (int i = 0; i < 2; ++i)
+ {
+ resource[i] = new RenderTexture(size.x, size.y, 0, format) { name = name + (i + 1), enableRandomWrite = enableRandomWrite };
+ resource[i].Create();
+ }
+ }
+
+ public void Destroy()
+ {
+ DestroyResource(LumaHistory);
+ DestroyResource(InternalReactive);
+ DestroyResource(InternalUpscaled);
+ DestroyResource(LockStatus);
+ DestroyResource(DilatedMotionVectors);
+ DestroyResource(AutoExposure);
+ DestroyResource(ref SceneLuminance);
+ DestroyResource(ref DefaultReactive);
+ DestroyResource(ref DefaultExposure);
+ DestroyResource(ref MaximumBiasLut);
+ DestroyResource(ref LanczosLut);
+ }
+
+ private static void DestroyResource(ref Texture2D resource)
+ {
+ Asr.DestroyObject(resource);
+ resource = null;
+ }
+
+ private static void DestroyResource(ref RenderTexture resource)
+ {
+ if (resource == null)
+ return;
+
+ resource.Release();
+ resource = null;
+ }
+
+ private static void DestroyResource(RenderTexture[] resource)
+ {
+ for (int i = 0; i < resource.Length; ++i)
+ DestroyResource(ref resource[i]);
+ }
+
+ private const int MaximumBiasTextureWidth = 16;
+ private const int MaximumBiasTextureHeight = 16;
+ private static readonly float[] MaximumBias =
+ {
+ 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.876f, 1.809f, 1.772f, 1.753f, 1.748f,
+ 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.869f, 1.801f, 1.764f, 1.745f, 1.739f,
+ 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.976f, 1.841f, 1.774f, 1.737f, 1.716f, 1.71f,
+ 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.914f, 1.784f, 1.716f, 1.673f, 1.649f, 1.641f,
+ 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.793f, 1.676f, 1.604f, 1.562f, 1.54f, 1.533f,
+ 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.802f, 1.619f, 1.536f, 1.492f, 1.467f, 1.454f, 1.449f,
+ 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.812f, 1.575f, 1.496f, 1.456f, 1.432f, 1.416f, 1.408f, 1.405f,
+ 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.555f, 1.479f, 1.438f, 1.413f, 1.398f, 1.387f, 1.381f, 1.379f,
+ 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.812f, 1.555f, 1.474f, 1.43f, 1.404f, 1.387f, 1.376f, 1.368f, 1.363f, 1.362f,
+ 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.802f, 1.575f, 1.479f, 1.43f, 1.401f, 1.382f, 1.369f, 1.36f, 1.354f, 1.351f, 1.35f,
+ 2.0f, 2.0f, 1.976f, 1.914f, 1.793f, 1.619f, 1.496f, 1.438f, 1.404f, 1.382f, 1.367f, 1.357f, 1.349f, 1.344f, 1.341f, 1.34f,
+ 1.876f, 1.869f, 1.841f, 1.784f, 1.676f, 1.536f, 1.456f, 1.413f, 1.387f, 1.369f, 1.357f, 1.347f, 1.341f, 1.336f, 1.333f, 1.332f,
+ 1.809f, 1.801f, 1.774f, 1.716f, 1.604f, 1.492f, 1.432f, 1.398f, 1.376f, 1.36f, 1.349f, 1.341f, 1.335f, 1.33f, 1.328f, 1.327f,
+ 1.772f, 1.764f, 1.737f, 1.673f, 1.562f, 1.467f, 1.416f, 1.387f, 1.368f, 1.354f, 1.344f, 1.336f, 1.33f, 1.326f, 1.323f, 1.323f,
+ 1.753f, 1.745f, 1.716f, 1.649f, 1.54f, 1.454f, 1.408f, 1.381f, 1.363f, 1.351f, 1.341f, 1.333f, 1.328f, 1.323f, 1.321f, 1.32f,
+ 1.748f, 1.739f, 1.71f, 1.641f, 1.533f, 1.449f, 1.405f, 1.379f, 1.362f, 1.35f, 1.34f, 1.332f, 1.327f, 1.323f, 1.32f, 1.319f,
+ };
+ }
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs.meta
new file mode 100644
index 0000000..b0f5f23
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrResources.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 538f6eefa95c8ee4d9f6a9bc4bb3188e
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {instanceID: 0}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrShaderIDs.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrShaderIDs.cs
new file mode 100644
index 0000000..9b1cd1f
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrShaderIDs.cs
@@ -0,0 +1,70 @@
+using UnityEngine;
+
+namespace ArmASR
+{
+ public static class AsrShaderIDs
+ {
+ // Shader resource views, i.e. read-only bindings
+ public static readonly int SrvInputColor = Shader.PropertyToID("r_input_color_jittered");
+ public static readonly int SrvOpaqueOnly = Shader.PropertyToID("r_input_opaque_only");
+ public static readonly int SrvInputMotionVectors = Shader.PropertyToID("r_input_motion_vectors");
+ public static readonly int SrvInputDepth = Shader.PropertyToID("r_input_depth");
+ public static readonly int SrvInputExposure = Shader.PropertyToID("r_input_exposure");
+ public static readonly int SrvAutoExposure = Shader.PropertyToID("r_auto_exposure");
+ public static readonly int SrvReactiveMask = Shader.PropertyToID("r_reactive_mask");
+ public static readonly int SrvTransparencyAndCompositionMask = Shader.PropertyToID("r_transparency_and_composition_mask");
+ public static readonly int SrvReconstructedPrevNearestDepth = Shader.PropertyToID("r_reconstructed_previous_nearest_depth");
+ public static readonly int SrvDilatedMotionVectors = Shader.PropertyToID("r_dilated_motion_vectors");
+ public static readonly int SrvPrevDilatedMotionVectors = Shader.PropertyToID("r_previous_dilated_motion_vectors");
+ public static readonly int SrvDilatedDepth = Shader.PropertyToID("r_dilatedDepth");
+ public static readonly int SrvInternalUpscaled = Shader.PropertyToID("r_internal_upscaled_color");
+ public static readonly int SrvInternalTemporalReactive = Shader.PropertyToID("r_internal_temporal_reactive");
+ public static readonly int SrvLockStatus = Shader.PropertyToID("r_lock_status");
+ public static readonly int SrvNewLocks = Shader.PropertyToID("r_new_locks");
+ public static readonly int SrvLockInputLuma = Shader.PropertyToID("r_lock_input_luma");
+ public static readonly int SrvPreparedInputColor = Shader.PropertyToID("r_prepared_input_color");
+ public static readonly int SrvLumaHistory = Shader.PropertyToID("r_luma_history");
+ public static readonly int SrvRcasInput = Shader.PropertyToID("r_rcas_input");
+ public static readonly int SrvLanczosLut = Shader.PropertyToID("r_lanczos_lut");
+ public static readonly int SrvSceneLuminanceMips = Shader.PropertyToID("r_imgMips");
+ public static readonly int SrvUpscaleMaximumBiasLut = Shader.PropertyToID("r_upsample_maximum_bias_lut");
+ public static readonly int SrvDilatedReactiveMasks = Shader.PropertyToID("r_dilated_reactive_masks");
+
+ // Unordered access views, i.e. random read/write bindings
+ public static readonly int UavReconstructedPrevNearestDepth = Shader.PropertyToID("rw_reconstructed_previous_nearest_depth");
+ public static readonly int UavDilatedMotionVectors = Shader.PropertyToID("rw_dilated_motion_vectors");
+ public static readonly int UavDilatedDepth = Shader.PropertyToID("rw_dilatedDepth");
+ public static readonly int UavInternalUpscaled = Shader.PropertyToID("rw_internal_upscaled_color");
+ public static readonly int UavLockStatus = Shader.PropertyToID("rw_lock_status");
+ public static readonly int UavLockInputLuma = Shader.PropertyToID("rw_lock_input_luma");
+ public static readonly int UavNewLocks = Shader.PropertyToID("rw_new_locks");
+ public static readonly int UavPreparedInputColor = Shader.PropertyToID("rw_prepared_input_color");
+ public static readonly int UavLumaHistory = Shader.PropertyToID("rw_luma_history");
+ public static readonly int UavUpscaledOutput = Shader.PropertyToID("rw_upscaled_output");
+ public static readonly int UavExposureMipLumaChange = Shader.PropertyToID("rw_img_mip_shading_change");
+ public static readonly int UavExposureMip5 = Shader.PropertyToID("rw_img_mip_5");
+ public static readonly int UavDilatedReactiveMasks = Shader.PropertyToID("rw_dilated_reactive_masks");
+ public static readonly int UavAutoExposure = Shader.PropertyToID("rw_auto_exposure");
+ public static readonly int UavSpdAtomicCount = Shader.PropertyToID("rw_spd_global_atomic");
+ public static readonly int UavAutoReactive = Shader.PropertyToID("rw_output_autoreactive");
+
+ // Render textures, i.e. output targets for fragment shaders
+ public static readonly int RtInternalUpscalerColor = Shader.PropertyToID("rw_internal_upscaled_color");
+ public static readonly int RtInternalTemporalReactive = Shader.PropertyToID("rw_internal_temporal_reactive");
+ public static readonly int RtLockStatus = Shader.PropertyToID("rw_lock_status");
+ public static readonly int RtLumaHistory = Shader.PropertyToID("rw_luma_history");
+ public static readonly int RtUpscaledOutput = Shader.PropertyToID("rw_upscaled_output");
+ public static readonly int RtDilatedReactiveMasks = Shader.PropertyToID("rw_dilated_reactive_masks");
+ public static readonly int RtPreparedInputColor = Shader.PropertyToID("rw_prepared_input_color");
+ public static readonly int RtDilatedMotionVectors = Shader.PropertyToID("rw_dilated_motion_vectors");
+ public static readonly int RtDilatedDepth = Shader.PropertyToID("rw_dilatedDepth");
+ public static readonly int RtLockInputLuma = Shader.PropertyToID("rw_lock_input_luma");
+ public static readonly int RtAutoReactive = Shader.PropertyToID("rw_output_autoreactive");
+
+ // Constant buffer bindings
+ public static readonly int CbFsr2 = Shader.PropertyToID("cbFSR2");
+ public static readonly int CbSpd = Shader.PropertyToID("cbSPD");
+ public static readonly int CbRcas = Shader.PropertyToID("cbRCAS");
+ public static readonly int CbGenReactive = Shader.PropertyToID("cbGenerateReactive");
+ }
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrShaderIDs.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrShaderIDs.cs.meta
new file mode 100644
index 0000000..c65dbb6
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/AsrShaderIDs.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: e0173241f8bd75e419590b43a3739e0e
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {instanceID: 0}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/ResourceView.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/ResourceView.cs
new file mode 100644
index 0000000..f0d6be1
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/ResourceView.cs
@@ -0,0 +1,35 @@
+using UnityEngine.Rendering;
+
+namespace ArmASR
+{
+ ///
+ /// An immutable structure wrapping all the necessary information to bind a specific buffer or attachment of a render target to a compute shader.
+ ///
+ public readonly struct ResourceView
+ {
+ ///
+ /// This value is the equivalent of not setting any value at all; all struct fields will have their default values.
+ /// It does not refer to a valid texture, therefore any variable set to this value should be checked for IsValid and reassigned before being bound to a shader.
+ ///
+ public static readonly ResourceView Unassigned = new ResourceView(default);
+
+ ///
+ /// This value contains a valid texture reference that can be bound to a shader, however it is just an empty placeholder texture.
+ /// Binding this to a shader can be seen as setting the texture variable inside the shader to null.
+ ///
+ public static readonly ResourceView None = new ResourceView(BuiltinRenderTextureType.None);
+
+ public ResourceView(in RenderTargetIdentifier renderTarget, RenderTextureSubElement subElement = RenderTextureSubElement.Default, int mipLevel = 0)
+ {
+ RenderTarget = renderTarget;
+ SubElement = subElement;
+ MipLevel = mipLevel;
+ }
+
+ public bool IsValid => !RenderTarget.Equals(default);
+
+ public readonly RenderTargetIdentifier RenderTarget;
+ public readonly RenderTextureSubElement SubElement;
+ public readonly int MipLevel;
+ }
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/ResourceView.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/ResourceView.cs.meta
new file mode 100644
index 0000000..f7e1122
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Runtime/ResourceView.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 6e2e3cd4f5c3d4146b6fe3f93685751b
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {instanceID: 0}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders.meta
new file mode 100644
index 0000000..57ef699
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: d231e3fb22497e3448f149c48709d08d
+folderAsset: yes
+DefaultImporter:
+ externalObjects: {}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc
new file mode 100644
index 0000000..5db4b7a
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc
@@ -0,0 +1,84 @@
+#pragma warning(disable: 3078) // Loop control variable conflicts
+#pragma warning(disable: 3203) // Signed/unsigned mismatch
+#pragma warning(disable: 3205) // Conversion from larger type to smaller, possible loss of data
+#pragma warning(disable: 3556) // Integer divides might be much slower, try using uints if possible
+
+// If these keywords are set by Unity, redefine them to have a truthy value
+#if defined(FFXM_HALF)
+#undef FFXM_HALF
+#define FFXM_HALF (1)
+#endif
+#ifdef FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE
+#undef FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE
+#define FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE 1
+#endif
+#ifdef FFXM_FSR2_OPTION_SHADER_OPT_BALANCED
+#undef FFXM_FSR2_OPTION_SHADER_OPT_BALANCED
+#define FFXM_FSR2_OPTION_SHADER_OPT_BALANCED 1
+#endif
+
+// Ensure the correct value is defined for this keyword, as it is used to select one of multiple sampler functions
+#ifdef FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE
+#undef FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE
+#define FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE 1
+#endif
+
+// ASR has some special code paths for OpenGL ES 3.2
+#if defined(SHADER_API_GLES3)
+#define FFXM_SHADER_PLATFORM_GLES_3_2 (1)
+#define unorm
+#endif
+
+// Work around the lack of texture atomics on Metal
+#if defined(SHADER_API_METAL)
+#define InterlockedAdd(dest, val, orig) { (orig) = (dest); (dest) += (val); }
+#define InterlockedMin(dest, val) { (dest) = min((dest), (val)); }
+#define InterlockedMax(dest, val) { (dest) = max((dest), (val)); }
+#endif
+
+// PSSL uses different semantics and doesn't support certain type qualifiers
+#if defined(SHADER_API_PSSL)
+#define SV_VERTEXID S_VERTEX_ID
+#define SV_POSITION S_POSITION
+#define SV_TARGET0 S_TARGET_OUTPUT0
+#define SV_TARGET1 S_TARGET_OUTPUT1
+#define SV_TARGET2 S_TARGET_OUTPUT2
+#define SV_TARGET3 S_TARGET_OUTPUT3
+#define unorm
+#define globallycoherent
+#endif
+
+// Workaround for HDRP using texture arrays for its camera buffers on some platforms
+// The below defines are adapted from: Packages/com.unity.render-pipelines.core/ShaderLibrary/TextureXR.hlsl
+#if ((defined(SHADER_API_D3D11) || defined(SHADER_API_D3D12)) && !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_GAMECORE)) || defined(SHADER_API_PSSL) || defined(SHADER_API_VULKAN)
+ #define UNITY_TEXTURE2D_X_ARRAY_SUPPORTED
+#endif
+
+// Control if TEXTURE2D_X macros will expand to texture arrays
+#if defined(UNITY_TEXTURE2D_X_ARRAY_SUPPORTED) && defined(UNITY_FFXM_TEXTURE2D_X_ARRAY)
+ #define USE_TEXTURE2D_X_AS_ARRAY
+#endif
+
+// Early defines for single-pass instancing
+#if defined(STEREO_INSTANCING_ON) && defined(UNITY_TEXTURE2D_X_ARRAY_SUPPORTED)
+ #define UNITY_STEREO_INSTANCING_ENABLED
+#endif
+
+// Helper macros to handle XR single-pass with Texture2DArray
+#if defined(USE_TEXTURE2D_X_AS_ARRAY)
+
+ // Only single-pass stereo instancing used array indexing
+ #if defined(UNITY_STEREO_INSTANCING_ENABLED)
+ static uint unity_StereoEyeIndex;
+ #define SLICE_ARRAY_INDEX unity_StereoEyeIndex
+ #else
+ #define SLICE_ARRAY_INDEX 0
+ #endif
+
+ // Declare and sample camera buffers as texture arrays
+ #define UNITY_FSR_TEX2D(type) Texture2DArray
+ #define UNITY_FSR_RWTEX2D(type) RWTexture2DArray
+ #define UNITY_FSR_POS(pxPos) FfxUInt32x3(pxPos, SLICE_ARRAY_INDEX)
+ #define UNITY_FSR_UV(uv) FfxFloat32x3(uv, SLICE_ARRAY_INDEX)
+
+#endif
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc.meta
new file mode 100644
index 0000000..2bfbafa
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_common.cginc.meta
@@ -0,0 +1,3 @@
+fileFormatVersion: 2
+guid: 6836bc2e151d44e0bd1991fd7bfaee30
+timeCreated: 1742410813
\ No newline at end of file
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_legacy.shader b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_legacy.shader
new file mode 100644
index 0000000..236b507
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_legacy.shader
@@ -0,0 +1,122 @@
+Shader "TND/ASR/ffxm_fsr2_fragment_legacy"
+{
+ SubShader
+ {
+ Cull Off ZWrite Off ZTest Always
+
+ Pass // 0
+ {
+ Name "Auto-Generate Reactive Mask"
+
+ HLSLPROGRAM
+ #pragma vertex VertMain
+ #pragma fragment main
+ #pragma target 4.5
+ //#pragma enable_d3d11_debug_symbols
+
+ #pragma multi_compile __ FFXM_HALF
+ #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY
+
+ #include "ffxm_fsr2_common.cginc"
+ #include "shaders/ffxm_fsr2_vs.hlsl"
+ #include "shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl"
+
+ ENDHLSL
+ }
+
+ Pass // 1
+ {
+ Name "Reconstruct Previous Depth"
+
+ HLSLPROGRAM
+ #pragma vertex VertMain
+ #pragma fragment main
+ #pragma target 4.5
+ //#pragma enable_d3d11_debug_symbols
+
+ #pragma multi_compile __ FFXM_HALF
+ #pragma multi_compile __ FFXM_FSR2_OPTION_HDR_COLOR_INPUT
+ #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+ #pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS
+ #pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH
+ #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY
+
+ #include "ffxm_fsr2_common.cginc"
+ #include "shaders/ffxm_fsr2_vs.hlsl"
+ #include "shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl"
+
+ ENDHLSL
+ }
+
+ Pass // 2
+ {
+ Name "Depth Clip"
+
+ HLSLPROGRAM
+ #pragma vertex VertMain
+ #pragma fragment main
+ #pragma target 4.5
+ //#pragma enable_d3d11_debug_symbols
+
+ #pragma multi_compile __ FFXM_HALF
+ #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_BALANCED
+ #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE
+ #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+ #pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS
+ #pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH
+ #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY
+
+ #include "ffxm_fsr2_common.cginc"
+ #include "shaders/ffxm_fsr2_vs.hlsl"
+ #include "shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl"
+
+ ENDHLSL
+ }
+
+ Pass // 3
+ {
+ Name "Accumulate"
+
+ HLSLPROGRAM
+ #pragma vertex VertMain
+ #pragma fragment main
+ #pragma target 4.5
+ //#pragma enable_d3d11_debug_symbols
+
+ #pragma multi_compile __ FFXM_HALF
+ #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_BALANCED
+ #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE
+ #pragma multi_compile __ FFXM_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE
+ #pragma multi_compile __ FFXM_FSR2_OPTION_HDR_COLOR_INPUT
+ #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+ #pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS
+ #pragma multi_compile __ FFXM_FSR2_OPTION_APPLY_SHARPENING
+ #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY
+
+ #include "ffxm_fsr2_common.cginc"
+ #include "shaders/ffxm_fsr2_vs.hlsl"
+ #include "shaders/ffxm_fsr2_accumulate_pass_fs.hlsl"
+
+ ENDHLSL
+ }
+
+ Pass // 4
+ {
+ Name "Sharpen"
+
+ HLSLPROGRAM
+ #pragma vertex VertMain
+ #pragma fragment main
+ #pragma target 4.5
+ //#pragma enable_d3d11_debug_symbols
+
+ #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY
+
+ #include "ffxm_fsr2_common.cginc"
+ #include "shaders/ffxm_fsr2_vs.hlsl"
+ #include "shaders/ffxm_fsr2_rcas_pass_fs.hlsl"
+
+ ENDHLSL
+ }
+ }
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_legacy.shader.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_legacy.shader.meta
new file mode 100644
index 0000000..4fc6554
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_legacy.shader.meta
@@ -0,0 +1,9 @@
+fileFormatVersion: 2
+guid: 42e5314e46109a441a4527349d8df6e4
+ShaderImporter:
+ externalObjects: {}
+ defaultTextures: []
+ nonModifiableTextures: []
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader
new file mode 100644
index 0000000..ec7f5a8
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader
@@ -0,0 +1,139 @@
+Shader "TND/ASR/ffxm_fsr2_fragment_modern"
+{
+ SubShader
+ {
+ Cull Off ZWrite Off ZTest Always
+
+ Pass // 0
+ {
+ Name "Auto-Generate Reactive Mask"
+
+ HLSLPROGRAM
+ #pragma vertex VertMain
+ #pragma fragment main
+ #pragma target 4.5
+ #pragma only_renderers d3d11 vulkan metal ps5 xboxseries
+ #pragma use_dxc
+ #pragma require Native16Bit
+ //#pragma enable_d3d11_debug_symbols
+
+ #pragma multi_compile __ FFXM_HALF
+ #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY
+
+ #include "ffxm_fsr2_common.cginc"
+ #include "shaders/ffxm_fsr2_vs.hlsl"
+ #include "shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl"
+
+ ENDHLSL
+ }
+
+ Pass // 1
+ {
+ Name "Reconstruct Previous Depth"
+
+ HLSLPROGRAM
+ #pragma vertex VertMain
+ #pragma fragment main
+ #pragma target 4.5
+ #pragma only_renderers d3d11 vulkan metal ps5 xboxseries
+ #pragma use_dxc
+ #pragma require Native16Bit
+ //#pragma enable_d3d11_debug_symbols
+
+ #pragma multi_compile __ FFXM_HALF
+ #pragma multi_compile __ FFXM_FSR2_OPTION_HDR_COLOR_INPUT
+ #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+ #pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS
+ #pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH
+ #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY
+
+ #include "ffxm_fsr2_common.cginc"
+ #include "shaders/ffxm_fsr2_vs.hlsl"
+ #include "shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl"
+
+ ENDHLSL
+ }
+
+ Pass // 2
+ {
+ Name "Depth Clip"
+
+ HLSLPROGRAM
+ #pragma vertex VertMain
+ #pragma fragment main
+ #pragma target 4.5
+ #pragma only_renderers d3d11 vulkan metal ps5 xboxseries
+ #pragma use_dxc
+ #pragma require Native16Bit
+ //#pragma enable_d3d11_debug_symbols
+
+ #pragma multi_compile __ FFXM_HALF
+ #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_BALANCED
+ #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE
+ #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+ #pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS
+ #pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH
+ #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY
+
+ #include "ffxm_fsr2_common.cginc"
+ #include "shaders/ffxm_fsr2_vs.hlsl"
+ #include "shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl"
+
+ ENDHLSL
+ }
+
+ Pass // 3
+ {
+ Name "Accumulate"
+
+ HLSLPROGRAM
+ #pragma vertex VertMain
+ #pragma fragment main
+ #pragma target 4.5
+ #pragma only_renderers d3d11 vulkan metal ps5 xboxseries
+ #pragma use_dxc
+ #pragma require Native16Bit
+ //#pragma enable_d3d11_debug_symbols
+
+ #pragma multi_compile __ FFXM_HALF
+ #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_BALANCED
+ #pragma multi_compile __ FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE
+ #pragma multi_compile __ FFXM_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE
+ #pragma multi_compile __ FFXM_FSR2_OPTION_HDR_COLOR_INPUT
+ #pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+ #pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS
+ #pragma multi_compile __ FFXM_FSR2_OPTION_APPLY_SHARPENING
+ #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY
+
+ #include "ffxm_fsr2_common.cginc"
+ #include "shaders/ffxm_fsr2_vs.hlsl"
+ #include "shaders/ffxm_fsr2_accumulate_pass_fs.hlsl"
+
+ ENDHLSL
+ }
+
+ Pass // 4
+ {
+ Name "Sharpen"
+
+ HLSLPROGRAM
+ #pragma vertex VertMain
+ #pragma fragment main
+ #pragma target 4.5
+ #pragma only_renderers d3d11 vulkan metal ps5 xboxseries
+ #pragma use_dxc
+ #pragma require Native16Bit
+ //#pragma enable_d3d11_debug_symbols
+
+ #pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY
+
+ #include "ffxm_fsr2_common.cginc"
+ #include "shaders/ffxm_fsr2_vs.hlsl"
+ #include "shaders/ffxm_fsr2_rcas_pass_fs.hlsl"
+
+ ENDHLSL
+ }
+ }
+
+ Fallback "TND/ASR/ffxm_fsr2_fragment_legacy"
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader.meta
new file mode 100644
index 0000000..256f8a3
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_fragment_modern.shader.meta
@@ -0,0 +1,9 @@
+fileFormatVersion: 2
+guid: 147cc2cffac69ef4eb3ea8addafc9d10
+ShaderImporter:
+ externalObjects: {}
+ defaultTextures: []
+ nonModifiableTextures: []
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_legacy.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_legacy.compute
new file mode 100644
index 0000000..d7d43d4
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_legacy.compute
@@ -0,0 +1,13 @@
+#pragma kernel main
+//#pragma enable_d3d11_debug_symbols
+
+#pragma multi_compile __ FFXM_HALF
+#pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+#pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS
+#pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH
+
+#pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY
+
+#include "ffxm_fsr2_common.cginc"
+
+#include "shaders/ffxm_fsr2_lock_pass.hlsl"
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_legacy.compute.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_legacy.compute.meta
new file mode 100644
index 0000000..2bfe598
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_legacy.compute.meta
@@ -0,0 +1,3 @@
+fileFormatVersion: 2
+guid: a09277df48840a84196b3bac299544ea
+timeCreated: 1742417134
\ No newline at end of file
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute
new file mode 100644
index 0000000..50138c7
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute
@@ -0,0 +1,15 @@
+#pragma kernel main
+#pragma only_renderers d3d11 vulkan metal ps5 xboxseries
+#pragma use_dxc
+//#pragma enable_d3d11_debug_symbols
+
+#pragma multi_compile __ FFXM_HALF
+#pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+#pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS
+#pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH
+
+#pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY
+
+#include "ffxm_fsr2_common.cginc"
+
+#include "shaders/ffxm_fsr2_lock_pass.hlsl"
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute.meta
new file mode 100644
index 0000000..1b473ab
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_lock_modern.compute.meta
@@ -0,0 +1,3 @@
+fileFormatVersion: 2
+guid: a6e1d5d5372d467790fcf2d089b50ef7
+timeCreated: 1742417134
\ No newline at end of file
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute
new file mode 100644
index 0000000..04c03ac
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute
@@ -0,0 +1,16 @@
+#pragma kernel main
+//#pragma enable_d3d11_debug_symbols
+
+#pragma multi_compile __ FFXM_HALF
+#pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+#pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS
+#pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH
+
+#pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY
+
+#include "ffxm_fsr2_common.cginc"
+
+// Disable wave operations altogether
+#define FFXM_SPD_NO_WAVE_OPERATIONS
+
+#include "shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl"
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute.meta
new file mode 100644
index 0000000..e102c9d
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_legacy.compute.meta
@@ -0,0 +1,3 @@
+fileFormatVersion: 2
+guid: 41d0c3a77d97a904e96ebc2bf18129f6
+timeCreated: 1742416757
\ No newline at end of file
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute
new file mode 100644
index 0000000..5e1e43a
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute
@@ -0,0 +1,24 @@
+#pragma kernel main
+#pragma only_renderers d3d11 vulkan metal ps5 xboxseries
+#pragma use_dxc
+//#pragma enable_d3d11_debug_symbols
+
+#pragma multi_compile __ FFXM_HALF
+#pragma multi_compile __ FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+#pragma multi_compile __ FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS
+#pragma multi_compile __ FFXM_FSR2_OPTION_INVERTED_DEPTH
+
+#pragma multi_compile __ UNITY_FFXM_TEXTURE2D_X_ARRAY
+
+#include "ffxm_fsr2_common.cginc"
+
+// Enable wave operations for the platforms that support it
+#if defined(PLATFORM_SUPPORTS_WAVE_INTRINSICS) && !defined(SHADER_API_MOBILE)
+#pragma require WaveBasic // Required for WaveGetLaneIndex
+#pragma require WaveBallot // Required for WaveReadLaneAt
+#pragma require QuadShuffle // Required for QuadReadAcross
+#else
+#define FFXM_SPD_NO_WAVE_OPERATIONS
+#endif
+
+#include "shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl"
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute.meta
new file mode 100644
index 0000000..9989db6
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/ffxm_fsr2_luma_pyramid_modern.compute.meta
@@ -0,0 +1,3 @@
+fileFormatVersion: 2
+guid: 57220d870cb441c8a6df8a9e15a74283
+timeCreated: 1742416757
\ No newline at end of file
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders.meta
similarity index 77%
rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders.meta
rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders.meta
index 0d0996c..806a4d1 100644
--- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders.meta
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders.meta
@@ -1,5 +1,5 @@
fileFormatVersion: 2
-guid: 81bb130e0ef32fa4fb623a65d2f2116e
+guid: f7cc575273c4b124596cac0be2abd8ff
folderAsset: yes
DefaultImporter:
externalObjects: {}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h
new file mode 100644
index 0000000..8628721
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h
@@ -0,0 +1,614 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#ifndef FFXM_COMMON_TYPES_H
+#define FFXM_COMMON_TYPES_H
+
+#if defined(FFXM_CPU)
+#define FFXM_PARAMETER_IN
+#define FFXM_PARAMETER_OUT
+#define FFXM_PARAMETER_INOUT
+#define FFXM_PARAMETER_UNIFORM
+#elif defined(FFXM_HLSL)
+#define FFXM_PARAMETER_IN in
+#define FFXM_PARAMETER_OUT out
+#define FFXM_PARAMETER_INOUT inout
+#define FFXM_PARAMETER_UNIFORM uniform
+#elif defined(FFXM_GLSL)
+#define FFXM_PARAMETER_IN in
+#define FFXM_PARAMETER_OUT out
+#define FFXM_PARAMETER_INOUT inout
+#define FFXM_PARAMETER_UNIFORM const //[cacao_placeholder] until a better fit is found!
+#endif // #if defined(FFXM_CPU)
+
+#if defined(FFXM_CPU)
+/// A typedef for a boolean value.
+///
+/// @ingroup CPUTypes
+typedef bool FfxBoolean;
+
+/// A typedef for a unsigned 8bit integer.
+///
+/// @ingroup CPUTypes
+typedef uint8_t FfxUInt8;
+
+/// A typedef for a unsigned 16bit integer.
+///
+/// @ingroup CPUTypes
+typedef uint16_t FfxUInt16;
+
+/// A typedef for a unsigned 32bit integer.
+///
+/// @ingroup CPUTypes
+typedef uint32_t FfxUInt32;
+
+/// A typedef for a unsigned 64bit integer.
+///
+/// @ingroup CPUTypes
+typedef uint64_t FfxUInt64;
+
+/// A typedef for a signed 8bit integer.
+///
+/// @ingroup CPUTypes
+typedef int8_t FfxInt8;
+
+/// A typedef for a signed 16bit integer.
+///
+/// @ingroup CPUTypes
+typedef int16_t FfxInt16;
+
+/// A typedef for a signed 32bit integer.
+///
+/// @ingroup CPUTypes
+typedef int32_t FfxInt32;
+
+/// A typedef for a signed 64bit integer.
+///
+/// @ingroup CPUTypes
+typedef int64_t FfxInt64;
+
+/// A typedef for a floating point value.
+///
+/// @ingroup CPUTypes
+typedef float FfxFloat32;
+
+/// A typedef for a 2-dimensional floating point value.
+///
+/// @ingroup CPUTypes
+typedef float FfxFloat32x2[2];
+
+/// A typedef for a 3-dimensional floating point value.
+///
+/// @ingroup CPUTypes
+typedef float FfxFloat32x3[3];
+
+/// A typedef for a 4-dimensional floating point value.
+///
+/// @ingroup CPUTypes
+typedef float FfxFloat32x4[4];
+
+/// A typedef for a 2-dimensional 32bit unsigned integer.
+///
+/// @ingroup CPUTypes
+typedef uint32_t FfxUInt32x2[2];
+
+/// A typedef for a 3-dimensional 32bit unsigned integer.
+///
+/// @ingroup CPUTypes
+typedef uint32_t FfxUInt32x3[3];
+
+/// A typedef for a 4-dimensional 32bit unsigned integer.
+///
+/// @ingroup CPUTypes
+typedef uint32_t FfxUInt32x4[4];
+#endif // #if defined(FFXM_CPU)
+
+#if defined(FFXM_HLSL)
+
+// Unless defined, go for the conservative option.
+#if !defined(FFXM_HLSL_6_2)
+#define FFXM_HLSL_6_2 (0)
+#endif
+
+#define FfxFloat32Mat4 matrix
+#define FfxFloat32Mat3 matrix
+
+/// A typedef for a boolean value.
+///
+/// @ingroup HLSLTypes
+typedef bool FfxBoolean;
+
+#if FFXM_HLSL_6_2
+
+/// @defgroup HLSL62Types HLSL 6.2 And Above Types
+/// HLSL 6.2 and above type defines for all commonly used variables
+///
+/// @ingroup HLSLTypes
+
+/// A typedef for a floating point value.
+///
+/// @ingroup HLSL62Types
+typedef float32_t FfxFloat32;
+
+/// A typedef for a 2-dimensional floating point value.
+///
+/// @ingroup HLSL62Types
+typedef float32_t2 FfxFloat32x2;
+
+/// A typedef for a 3-dimensional floating point value.
+///
+/// @ingroup HLSL62Types
+typedef float32_t3 FfxFloat32x3;
+
+/// A typedef for a 4-dimensional floating point value.
+///
+/// @ingroup HLSL62Types
+typedef float32_t4 FfxFloat32x4;
+
+/// A [cacao_placeholder] typedef for matrix type until confirmed.
+typedef float4x4 FfxFloat32x4x4;
+typedef float3x3 FfxFloat32x3x3;
+typedef float2x2 FfxFloat32x2x2;
+
+/// A typedef for a unsigned 32bit integer.
+///
+/// @ingroup HLSL62Types
+typedef uint32_t FfxUInt32;
+
+/// A typedef for a 2-dimensional 32bit unsigned integer.
+///
+/// @ingroup HLSL62Types
+typedef uint32_t2 FfxUInt32x2;
+
+/// A typedef for a 3-dimensional 32bit unsigned integer.
+///
+/// @ingroup HLSL62Types
+typedef uint32_t3 FfxUInt32x3;
+
+/// A typedef for a 4-dimensional 32bit unsigned integer.
+///
+/// @ingroup HLSL62Types
+typedef uint32_t4 FfxUInt32x4;
+
+/// A typedef for a signed 32bit integer.
+///
+/// @ingroup HLSL62Types
+typedef int32_t FfxInt32;
+
+/// A typedef for a 2-dimensional signed 32bit integer.
+///
+/// @ingroup HLSL62Types
+typedef int32_t2 FfxInt32x2;
+
+/// A typedef for a 3-dimensional signed 32bit integer.
+///
+/// @ingroup HLSL62Types
+typedef int32_t3 FfxInt32x3;
+
+/// A typedef for a 4-dimensional signed 32bit integer.
+///
+/// @ingroup HLSL62Types
+typedef int32_t4 FfxInt32x4;
+
+#else // #if defined(FFXM_HLSL_6_2)
+
+/// @defgroup HLSLBaseTypes HLSL 6.1 And Below Types
+/// HLSL 6.1 and below type defines for all commonly used variables
+///
+/// @ingroup HLSLTypes
+
+#define FfxFloat32 float
+#define FfxFloat32x2 float2
+#define FfxFloat32x3 float3
+#define FfxFloat32x4 float4
+
+/// A [cacao_placeholder] typedef for matrix type until confirmed.
+#define FfxFloat32x4x4 float4x4
+#define FfxFloat32x3x3 float3x3
+#define FfxFloat32x2x2 float2x2
+
+/// A typedef for a unsigned 32bit integer.
+///
+/// @ingroup GPU
+typedef uint FfxUInt32;
+typedef uint2 FfxUInt32x2;
+typedef uint3 FfxUInt32x3;
+typedef uint4 FfxUInt32x4;
+
+typedef int FfxInt32;
+typedef int2 FfxInt32x2;
+typedef int3 FfxInt32x3;
+typedef int4 FfxInt32x4;
+
+#endif // #if defined(FFXM_HLSL_6_2)
+
+#if FFXM_HALF
+
+#if FFXM_HLSL_6_2
+
+typedef float16_t FfxFloat16;
+typedef float16_t2 FfxFloat16x2;
+typedef float16_t3 FfxFloat16x3;
+typedef float16_t4 FfxFloat16x4;
+
+/// A typedef for an unsigned 16bit integer.
+///
+/// @ingroup HLSLTypes
+typedef uint16_t FfxUInt16;
+typedef uint16_t2 FfxUInt16x2;
+typedef uint16_t3 FfxUInt16x3;
+typedef uint16_t4 FfxUInt16x4;
+
+/// A typedef for a signed 16bit integer.
+///
+/// @ingroup HLSLTypes
+typedef int16_t FfxInt16;
+typedef int16_t2 FfxInt16x2;
+typedef int16_t3 FfxInt16x3;
+typedef int16_t4 FfxInt16x4;
+#elif SHADER_API_PSSL
+#pragma argument(realtypes) // Enable true 16-bit types
+
+typedef half FfxFloat16;
+typedef half2 FfxFloat16x2;
+typedef half3 FfxFloat16x3;
+typedef half4 FfxFloat16x4;
+
+/// A typedef for an unsigned 16bit integer.
+///
+/// @ingroup GPU
+typedef ushort FfxUInt16;
+typedef ushort2 FfxUInt16x2;
+typedef ushort3 FfxUInt16x3;
+typedef ushort4 FfxUInt16x4;
+
+/// A typedef for a signed 16bit integer.
+///
+/// @ingroup GPU
+typedef short FfxInt16;
+typedef short2 FfxInt16x2;
+typedef short3 FfxInt16x3;
+typedef short4 FfxInt16x4;
+#else // #if FFXM_HLSL_6_2
+typedef min16float FfxFloat16;
+typedef min16float2 FfxFloat16x2;
+typedef min16float3 FfxFloat16x3;
+typedef min16float4 FfxFloat16x4;
+
+/// A typedef for an unsigned 16bit integer.
+///
+/// @ingroup HLSLTypes
+typedef min16uint FfxUInt16;
+typedef min16uint2 FfxUInt16x2;
+typedef min16uint3 FfxUInt16x3;
+typedef min16uint4 FfxUInt16x4;
+
+/// A typedef for a signed 16bit integer.
+///
+/// @ingroup HLSLTypes
+typedef min16int FfxInt16;
+typedef min16int2 FfxInt16x2;
+typedef min16int3 FfxInt16x3;
+typedef min16int4 FfxInt16x4;
+#endif // #if FFXM_HLSL_6_2
+
+#endif // FFXM_HALF
+
+#endif // #if defined(FFXM_HLSL)
+
+#if defined(FFXM_GLSL)
+
+#define FfxFloat32Mat4 mat4
+#define FfxFloat32Mat3 mat3
+
+/// A typedef for a boolean value.
+///
+/// @ingroup GLSLTypes
+#define FfxBoolean bool
+#define FfxFloat32 float
+#define FfxFloat32x2 vec2
+#define FfxFloat32x3 vec3
+#define FfxFloat32x4 vec4
+#define FfxUInt32 uint
+#define FfxUInt32x2 uvec2
+#define FfxUInt32x3 uvec3
+#define FfxUInt32x4 uvec4
+#define FfxInt32 int
+#define FfxInt32x2 ivec2
+#define FfxInt32x3 ivec3
+#define FfxInt32x4 ivec4
+
+/// A [cacao_placeholder] typedef for matrix type until confirmed.
+#define FfxFloat32x4x4 mat4
+#define FfxFloat32x3x3 mat3
+#define FfxFloat32x2x2 mat2
+
+#if FFXM_HALF
+#define FfxFloat16 float16_t
+#define FfxFloat16x2 f16vec2
+#define FfxFloat16x3 f16vec3
+#define FfxFloat16x4 f16vec4
+#define FfxUInt16 uint16_t
+#define FfxUInt16x2 u16vec2
+#define FfxUInt16x3 u16vec3
+#define FfxUInt16x4 u16vec4
+#define FfxInt16 int16_t
+#define FfxInt16x2 i16vec2
+#define FfxInt16x3 i16vec3
+#define FfxInt16x4 i16vec4
+#endif // FFXM_HALF
+#endif // #if defined(FFXM_GLSL)
+
+
+#if FFXM_HALF && !defined(SHADER_API_PSSL)
+
+#if FFXM_HLSL_6_2
+
+#define FFXM_MIN16_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType##16_t TypeName;
+#define FFXM_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName;
+#define FFXM_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName;
+
+#define FFXM_16BIT_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType##16_t TypeName;
+#define FFXM_16BIT_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName;
+#define FFXM_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName;
+
+#else //FFXM_HLSL_6_2
+
+#define FFXM_MIN16_SCALAR( TypeName, BaseComponentType ) typedef min16##BaseComponentType TypeName;
+#define FFXM_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName;
+#define FFXM_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName;
+
+#define FFXM_16BIT_SCALAR( TypeName, BaseComponentType ) FFXM_MIN16_SCALAR( TypeName, BaseComponentType );
+#define FFXM_16BIT_VECTOR( TypeName, BaseComponentType, COL ) FFXM_MIN16_VECTOR( TypeName, BaseComponentType, COL );
+#define FFXM_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) FFXM_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL );
+
+#endif //FFXM_HLSL_6_2
+
+#else //FFXM_HALF
+
+#define FFXM_MIN16_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType TypeName;
+#define FFXM_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName;
+#define FFXM_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName;
+
+#define FFXM_16BIT_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType TypeName;
+#define FFXM_16BIT_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName;
+#define FFXM_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName;
+
+#endif //FFXM_HALF
+
+#if defined(FFXM_GPU)
+// Common typedefs:
+#if defined(FFXM_HLSL) && !defined(SHADER_API_PSSL)
+FFXM_MIN16_SCALAR( FFXM_MIN16_F , float );
+FFXM_MIN16_VECTOR( FFXM_MIN16_F2, float, 2 );
+FFXM_MIN16_VECTOR( FFXM_MIN16_F3, float, 3 );
+FFXM_MIN16_VECTOR( FFXM_MIN16_F4, float, 4 );
+
+FFXM_MIN16_SCALAR( FFXM_MIN16_I, int );
+FFXM_MIN16_VECTOR( FFXM_MIN16_I2, int, 2 );
+FFXM_MIN16_VECTOR( FFXM_MIN16_I3, int, 3 );
+FFXM_MIN16_VECTOR( FFXM_MIN16_I4, int, 4 );
+
+FFXM_MIN16_SCALAR( FFXM_MIN16_U, uint );
+FFXM_MIN16_VECTOR( FFXM_MIN16_U2, uint, 2 );
+FFXM_MIN16_VECTOR( FFXM_MIN16_U3, uint, 3 );
+FFXM_MIN16_VECTOR( FFXM_MIN16_U4, uint, 4 );
+
+FFXM_16BIT_SCALAR( FFXM_F16_t , float );
+FFXM_16BIT_VECTOR( FFXM_F16_t2, float, 2 );
+FFXM_16BIT_VECTOR( FFXM_F16_t3, float, 3 );
+FFXM_16BIT_VECTOR( FFXM_F16_t4, float, 4 );
+
+FFXM_16BIT_SCALAR( FFXM_I16_t, int );
+FFXM_16BIT_VECTOR( FFXM_I16_t2, int, 2 );
+FFXM_16BIT_VECTOR( FFXM_I16_t3, int, 3 );
+FFXM_16BIT_VECTOR( FFXM_I16_t4, int, 4 );
+
+FFXM_16BIT_SCALAR( FFXM_U16_t, uint );
+FFXM_16BIT_VECTOR( FFXM_U16_t2, uint, 2 );
+FFXM_16BIT_VECTOR( FFXM_U16_t3, uint, 3 );
+FFXM_16BIT_VECTOR( FFXM_U16_t4, uint, 4 );
+
+#define TYPEDEF_MIN16_TYPES(Prefix) \
+typedef FFXM_MIN16_F Prefix##_F; \
+typedef FFXM_MIN16_F2 Prefix##_F2; \
+typedef FFXM_MIN16_F3 Prefix##_F3; \
+typedef FFXM_MIN16_F4 Prefix##_F4; \
+typedef FFXM_MIN16_I Prefix##_I; \
+typedef FFXM_MIN16_I2 Prefix##_I2; \
+typedef FFXM_MIN16_I3 Prefix##_I3; \
+typedef FFXM_MIN16_I4 Prefix##_I4; \
+typedef FFXM_MIN16_U Prefix##_U; \
+typedef FFXM_MIN16_U2 Prefix##_U2; \
+typedef FFXM_MIN16_U3 Prefix##_U3; \
+typedef FFXM_MIN16_U4 Prefix##_U4;
+
+#define TYPEDEF_16BIT_TYPES(Prefix) \
+typedef FFXM_16BIT_F Prefix##_F; \
+typedef FFXM_16BIT_F2 Prefix##_F2; \
+typedef FFXM_16BIT_F3 Prefix##_F3; \
+typedef FFXM_16BIT_F4 Prefix##_F4; \
+typedef FFXM_16BIT_I Prefix##_I; \
+typedef FFXM_16BIT_I2 Prefix##_I2; \
+typedef FFXM_16BIT_I3 Prefix##_I3; \
+typedef FFXM_16BIT_I4 Prefix##_I4; \
+typedef FFXM_16BIT_U Prefix##_U; \
+typedef FFXM_16BIT_U2 Prefix##_U2; \
+typedef FFXM_16BIT_U3 Prefix##_U3; \
+typedef FFXM_16BIT_U4 Prefix##_U4;
+
+#define TYPEDEF_FULL_PRECISION_TYPES(Prefix) \
+typedef FfxFloat32 Prefix##_F; \
+typedef FfxFloat32x2 Prefix##_F2; \
+typedef FfxFloat32x3 Prefix##_F3; \
+typedef FfxFloat32x4 Prefix##_F4; \
+typedef FfxInt32 Prefix##_I; \
+typedef FfxInt32x2 Prefix##_I2; \
+typedef FfxInt32x3 Prefix##_I3; \
+typedef FfxInt32x4 Prefix##_I4; \
+typedef FfxUInt32 Prefix##_U; \
+typedef FfxUInt32x2 Prefix##_U2; \
+typedef FfxUInt32x3 Prefix##_U3; \
+typedef FfxUInt32x4 Prefix##_U4;
+#endif // #if defined(FFXM_HLSL)
+
+#if defined(SHADER_API_PSSL)
+
+#if FFXM_HALF
+
+#define FFXM_MIN16_F half
+#define FFXM_MIN16_F2 half2
+#define FFXM_MIN16_F3 half3
+#define FFXM_MIN16_F4 half4
+
+#define FFXM_MIN16_I short
+#define FFXM_MIN16_I2 short2
+#define FFXM_MIN16_I3 short3
+#define FFXM_MIN16_I4 short4
+
+#define FFXM_MIN16_U ushort
+#define FFXM_MIN16_U2 ushort2
+#define FFXM_MIN16_U3 ushort3
+#define FFXM_MIN16_U4 ushort4
+
+#define FFXM_16BIT_F half
+#define FFXM_16BIT_F2 half2
+#define FFXM_16BIT_F3 half3
+#define FFXM_16BIT_F4 half4
+
+#define FFXM_16BIT_I short
+#define FFXM_16BIT_I2 short2
+#define FFXM_16BIT_I3 short3
+#define FFXM_16BIT_I4 short4
+
+#define FFXM_16BIT_U ushort
+#define FFXM_16BIT_U2 ushort2
+#define FFXM_16BIT_U3 ushort3
+#define FFXM_16BIT_U4 ushort4
+
+#else // FFXM_HALF
+
+#define FFXM_MIN16_F float
+#define FFXM_MIN16_F2 float2
+#define FFXM_MIN16_F3 float3
+#define FFXM_MIN16_F4 float4
+
+#define FFXM_MIN16_I int
+#define FFXM_MIN16_I2 int2
+#define FFXM_MIN16_I3 int3
+#define FFXM_MIN16_I4 int4
+
+#define FFXM_MIN16_U uint
+#define FFXM_MIN16_U2 uint2
+#define FFXM_MIN16_U3 uint3
+#define FFXM_MIN16_U4 uint4
+
+#define FFXM_16BIT_F float
+#define FFXM_16BIT_F2 float2
+#define FFXM_16BIT_F3 float3
+#define FFXM_16BIT_F4 float4
+
+#define FFXM_16BIT_I int
+#define FFXM_16BIT_I2 int2
+#define FFXM_16BIT_I3 int3
+#define FFXM_16BIT_I4 int4
+
+#define FFXM_16BIT_U uint
+#define FFXM_16BIT_U2 uint2
+#define FFXM_16BIT_U3 uint3
+#define FFXM_16BIT_U4 uint4
+
+#endif // FFXM_HALF
+
+#endif // #if defined(SHADER_API_PSSL)
+
+#if defined(FFXM_GLSL)
+
+#if FFXM_HALF
+
+#define FFXM_MIN16_F float16_t
+#define FFXM_MIN16_F2 f16vec2
+#define FFXM_MIN16_F3 f16vec3
+#define FFXM_MIN16_F4 f16vec4
+
+#define FFXM_MIN16_I int16_t
+#define FFXM_MIN16_I2 i16vec2
+#define FFXM_MIN16_I3 i16vec3
+#define FFXM_MIN16_I4 i16vec4
+
+#define FFXM_MIN16_U uint16_t
+#define FFXM_MIN16_U2 u16vec2
+#define FFXM_MIN16_U3 u16vec3
+#define FFXM_MIN16_U4 u16vec4
+
+#define FFXM_16BIT_F float16_t
+#define FFXM_16BIT_F2 f16vec2
+#define FFXM_16BIT_F3 f16vec3
+#define FFXM_16BIT_F4 f16vec4
+
+#define FFXM_16BIT_I int16_t
+#define FFXM_16BIT_I2 i16vec2
+#define FFXM_16BIT_I3 i16vec3
+#define FFXM_16BIT_I4 i16vec4
+
+#define FFXM_16BIT_U uint16_t
+#define FFXM_16BIT_U2 u16vec2
+#define FFXM_16BIT_U3 u16vec3
+#define FFXM_16BIT_U4 u16vec4
+
+#else // FFXM_HALF
+
+#define FFXM_MIN16_F float
+#define FFXM_MIN16_F2 vec2
+#define FFXM_MIN16_F3 vec3
+#define FFXM_MIN16_F4 vec4
+
+#define FFXM_MIN16_I int
+#define FFXM_MIN16_I2 ivec2
+#define FFXM_MIN16_I3 ivec3
+#define FFXM_MIN16_I4 ivec4
+
+#define FFXM_MIN16_U uint
+#define FFXM_MIN16_U2 uvec2
+#define FFXM_MIN16_U3 uvec3
+#define FFXM_MIN16_U4 uvec4
+
+#define FFXM_16BIT_F float
+#define FFXM_16BIT_F2 vec2
+#define FFXM_16BIT_F3 vec3
+#define FFXM_16BIT_F4 vec4
+
+#define FFXM_16BIT_I int
+#define FFXM_16BIT_I2 ivec2
+#define FFXM_16BIT_I3 ivec3
+#define FFXM_16BIT_I4 ivec4
+
+#define FFXM_16BIT_U uint
+#define FFXM_16BIT_U2 uvec2
+#define FFXM_16BIT_U3 uvec3
+#define FFXM_16BIT_U4 uvec4
+
+#endif // FFXM_HALF
+
+#endif // #if defined(FFXM_GLSL)
+
+#endif // #if defined(FFXM_GPU)
+#endif // #ifndef FFXM_COMMON_TYPES_H
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h.meta
new file mode 100644
index 0000000..12003b2
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_common_types.h.meta
@@ -0,0 +1,67 @@
+fileFormatVersion: 2
+guid: 86143f20804e7ad40af9d5e4bb7038f6
+PluginImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ iconMap: {}
+ executionOrder: {}
+ defineConstraints: []
+ isPreloaded: 0
+ isOverridable: 1
+ isExplicitlyReferenced: 0
+ validateReferences: 1
+ platformData:
+ - first:
+ : Any
+ second:
+ enabled: 0
+ settings:
+ Exclude Android: 1
+ Exclude Editor: 1
+ Exclude GameCoreScarlett: 1
+ Exclude GameCoreXboxOne: 1
+ Exclude Linux64: 1
+ Exclude OSXUniversal: 1
+ Exclude PS4: 1
+ Exclude PS5: 1
+ Exclude WebGL: 1
+ Exclude Win: 1
+ Exclude Win64: 1
+ - first:
+ Any:
+ second:
+ enabled: 0
+ settings: {}
+ - first:
+ Editor: Editor
+ second:
+ enabled: 0
+ settings:
+ DefaultValueInitialized: true
+ - first:
+ Standalone: Linux64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: OSXUniversal
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core.h
new file mode 100644
index 0000000..ee924e4
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core.h
@@ -0,0 +1,69 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+/// @defgroup FfxGPU GPU
+/// The FidelityFX SDK GPU References
+///
+/// @ingroup ffxSDK
+
+/// @defgroup FfxHLSL HLSL References
+/// FidelityFX SDK HLSL GPU References
+///
+/// @ingroup FfxGPU
+
+/// @defgroup FfxGLSL GLSL References
+/// FidelityFX SDK GLSL GPU References
+///
+/// @ingroup FfxGPU
+
+/// @defgroup FfxGPUEffects FidelityFX GPU References
+/// FidelityFX Effect GPU Reference Documentation
+///
+/// @ingroup FfxGPU
+
+/// @defgroup GPUCore GPU Core
+/// GPU defines and functions
+///
+/// @ingroup FfxGPU
+
+#if !defined(FFXM_CORE_H)
+#define FFXM_CORE_H
+
+#include "ffxm_common_types.h"
+
+#if defined(FFXM_CPU)
+#include "ffxm_core_cpu.h"
+#endif // #if defined(FFXM_CPU)
+
+#if defined(FFXM_GLSL) && defined(FFXM_GPU)
+#include "ffxm_core_glsl.h"
+#endif // #if defined(FFXM_GLSL) && defined(FFXM_GPU)
+
+#if defined(FFXM_HLSL) && defined(FFXM_GPU)
+#include "ffxm_core_hlsl.h"
+#endif // #if defined(FFXM_HLSL) && defined(FFXM_GPU)
+
+#if defined(FFXM_GPU)
+#include "ffxm_core_gpu_common.h"
+#include "ffxm_core_gpu_common_half.h"
+#include "ffxm_core_portability.h"
+#endif // #if defined(FFXM_GPU)
+#endif // #if !defined(FFXM_CORE_H)
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core.h.meta
new file mode 100644
index 0000000..90bce22
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core.h.meta
@@ -0,0 +1,67 @@
+fileFormatVersion: 2
+guid: 1de7e5f01f4c625458dbda94917d9aa1
+PluginImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ iconMap: {}
+ executionOrder: {}
+ defineConstraints: []
+ isPreloaded: 0
+ isOverridable: 1
+ isExplicitlyReferenced: 0
+ validateReferences: 1
+ platformData:
+ - first:
+ : Any
+ second:
+ enabled: 0
+ settings:
+ Exclude Android: 1
+ Exclude Editor: 1
+ Exclude GameCoreScarlett: 1
+ Exclude GameCoreXboxOne: 1
+ Exclude Linux64: 1
+ Exclude OSXUniversal: 1
+ Exclude PS4: 1
+ Exclude PS5: 1
+ Exclude WebGL: 1
+ Exclude Win: 1
+ Exclude Win64: 1
+ - first:
+ Any:
+ second:
+ enabled: 0
+ settings: {}
+ - first:
+ Editor: Editor
+ second:
+ enabled: 0
+ settings:
+ DefaultValueInitialized: true
+ - first:
+ Standalone: Linux64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: OSXUniversal
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_cpu.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_cpu.h
new file mode 100644
index 0000000..e32dbd3
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_cpu.h
@@ -0,0 +1,337 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+/// A define for a true value in a boolean expression.
+///
+/// @ingroup CPUTypes
+#define FFXM_TRUE (1)
+
+/// A define for a false value in a boolean expression.
+///
+/// @ingroup CPUTypes
+#define FFXM_FALSE (0)
+
+#if !defined(FFXM_STATIC)
+/// A define to abstract declaration of static variables and functions.
+///
+/// @ingroup CPUTypes
+#define FFXM_STATIC static
+#endif // #if !defined(FFXM_STATIC)
+
+/// @defgroup CPUCore CPU Core
+/// Core CPU-side defines and functions
+///
+/// @ingroup ffxHost
+
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wunused-variable"
+#endif
+
+/// Interpret the bit layout of an IEEE-754 floating point value as an unsigned integer.
+///
+/// @param [in] x A 32bit floating value.
+///
+/// @returns
+/// An unsigned 32bit integer value containing the bit pattern of x.
+///
+/// @ingroup CPUCore
+FFXM_STATIC FfxUInt32 ffxAsUInt32(FfxFloat32 x)
+{
+ union
+ {
+ FfxFloat32 f;
+ FfxUInt32 u;
+ } bits;
+
+ bits.f = x;
+ return bits.u;
+}
+
+FFXM_STATIC FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b)
+{
+ return a[0] * b[0] + a[1] * b[1];
+}
+
+FFXM_STATIC FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b)
+{
+ return a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
+}
+
+FFXM_STATIC FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b)
+{
+ return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the GLSL mix instrinsic function. Implements the
+/// following math:
+///
+/// (1 - t) * x + t * y
+///
+/// @param [in] x The first value to lerp between.
+/// @param [in] y The second value to lerp between.
+/// @param [in] t The value to determine how much of x and how much of y.
+///
+/// @returns
+/// A linearly interpolated value between x and y according to t.
+///
+/// @ingroup CPUCore
+FFXM_STATIC FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
+{
+ return y * t + (-x * t + x);
+}
+
+/// Compute the reciprocal of a value.
+///
+/// @param [in] x The value to compute the reciprocal for.
+///
+/// @returns
+/// The reciprocal value of x.
+///
+/// @ingroup CPUCore
+FFXM_STATIC FfxFloat32 ffxReciprocal(FfxFloat32 x)
+{
+ return 1.0f / x;
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x The first value to compute the min of.
+///
+/// @returns
+/// The the square root of x.
+///
+/// @ingroup CPUCore
+FFXM_STATIC FfxFloat32 ffxSqrt(FfxFloat32 x)
+{
+ return sqrt(x);
+}
+
+FFXM_STATIC FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)
+{
+ return FfxUInt32(FfxInt32(a) >> FfxInt32(b));
+}
+
+/// Compute the factional part of a decimal value.
+///
+/// This function calculates x - floor(x).
+///
+/// @param [in] x The value to compute the fractional part from.
+///
+/// @returns
+/// The fractional part of x.
+///
+/// @ingroup CPUCore
+FFXM_STATIC FfxFloat32 ffxFract(FfxFloat32 x)
+{
+ return x - floor(x);
+}
+
+/// Compute the reciprocal square root of a value.
+///
+/// @param [in] x The value to compute the reciprocal for.
+///
+/// @returns
+/// The reciprocal square root value of x.
+///
+/// @ingroup CPUCore
+FFXM_STATIC FfxFloat32 rsqrt(FfxFloat32 x)
+{
+ return ffxReciprocal(ffxSqrt(x));
+}
+
+FFXM_STATIC FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y)
+{
+ return x < y ? x : y;
+}
+
+FFXM_STATIC FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y)
+{
+ return x < y ? x : y;
+}
+
+FFXM_STATIC FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y)
+{
+ return x > y ? x : y;
+}
+
+FFXM_STATIC FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y)
+{
+ return x > y ? x : y;
+}
+
+/// Clamp a value to a [0..1] range.
+///
+/// @param [in] x The value to clamp to [0..1] range.
+///
+/// @returns
+/// The clamped version of x.
+///
+/// @ingroup CPUCore
+FFXM_STATIC FfxFloat32 ffxSaturate(FfxFloat32 x)
+{
+ return ffxMin(1.0f, ffxMax(0.0f, x));
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+FFXM_STATIC void opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
+{
+ d[0] = a[0] + b;
+ d[1] = a[1] + b;
+ d[2] = a[2] + b;
+ return;
+}
+
+FFXM_STATIC void opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a)
+{
+ d[0] = a[0];
+ d[1] = a[1];
+ d[2] = a[2];
+ return;
+}
+
+FFXM_STATIC void opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b)
+{
+ d[0] = a[0] * b[0];
+ d[1] = a[1] * b[1];
+ d[2] = a[2] * b[2];
+ return;
+}
+
+FFXM_STATIC void opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
+{
+ d[0] = a[0] * b;
+ d[1] = a[1] * b;
+ d[2] = a[2] * b;
+ return;
+}
+
+FFXM_STATIC void opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a)
+{
+ d[0] = ffxReciprocal(a[0]);
+ d[1] = ffxReciprocal(a[1]);
+ d[2] = ffxReciprocal(a[2]);
+ return;
+}
+
+/// Convert FfxFloat32 to half (in lower 16-bits of output).
+///
+/// This function implements the same fast technique that is documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
+///
+/// The function supports denormals.
+///
+/// Some conversion rules are to make computations possibly "safer" on the GPU,
+/// -INF & -NaN -> -65504
+/// +INF & +NaN -> +65504
+///
+/// @param [in] f The 32bit floating point value to convert.
+///
+/// @returns
+/// The closest 16bit floating point value to f.
+///
+/// @ingroup CPUCore
+FFXM_STATIC FfxUInt32 f32tof16(FfxFloat32 f)
+{
+ static FfxUInt16 base[512] = {
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400,
+ 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000,
+ 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+ 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002,
+ 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00,
+ 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, 0xf000, 0xf400, 0xf800,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+ 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff
+ };
+
+ static FfxUInt8 shift[512] = {
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+ 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+ 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18
+ };
+
+ union
+ {
+ FfxFloat32 f;
+ FfxUInt32 u;
+ } bits;
+
+ bits.f = f;
+ FfxUInt32 u = bits.u;
+ FfxUInt32 i = u >> 23;
+ return (FfxUInt32)(base[i]) + ((u & 0x7fffff) >> shift[i]);
+}
+
+/// Pack 2x32-bit floating point values in a single 32bit value.
+///
+/// This function first converts each component of value into their nearest 16-bit floating
+/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the
+/// 32bit unsigned integer respectively.
+///
+/// @param [in] x A 2-dimensional floating point value to convert and pack.
+///
+/// @returns
+/// A packed 32bit value containing 2 16bit floating point values.
+///
+/// @ingroup CPUCore
+FFXM_STATIC FfxUInt32 packHalf2x16(FfxFloat32x2 x)
+{
+ return f32tof16(x[0]) + (f32tof16(x[1]) << 16);
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_cpu.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_cpu.h.meta
new file mode 100644
index 0000000..b8f442f
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_cpu.h.meta
@@ -0,0 +1,67 @@
+fileFormatVersion: 2
+guid: 94fb3b7a7fde2f7448c52c5c262f5c01
+PluginImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ iconMap: {}
+ executionOrder: {}
+ defineConstraints: []
+ isPreloaded: 0
+ isOverridable: 1
+ isExplicitlyReferenced: 0
+ validateReferences: 1
+ platformData:
+ - first:
+ : Any
+ second:
+ enabled: 0
+ settings:
+ Exclude Android: 1
+ Exclude Editor: 1
+ Exclude GameCoreScarlett: 1
+ Exclude GameCoreXboxOne: 1
+ Exclude Linux64: 1
+ Exclude OSXUniversal: 1
+ Exclude PS4: 1
+ Exclude PS5: 1
+ Exclude WebGL: 1
+ Exclude Win: 1
+ Exclude Win64: 1
+ - first:
+ Any:
+ second:
+ enabled: 0
+ settings: {}
+ - first:
+ Editor: Editor
+ second:
+ enabled: 0
+ settings:
+ DefaultValueInitialized: true
+ - first:
+ Standalone: Linux64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: OSXUniversal
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common.h
new file mode 100644
index 0000000..e8df503
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common.h
@@ -0,0 +1,2812 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+/// Shader quality related defines
+///
+/// FFXM_FSR2_OPTION_SHADER_OPT_BALANCED. If defined, optimizations related to the balanced preset will be enabled.
+/// FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE. If defined, optimizations related to the performance preset will be enabled.
+#ifndef FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE
+#define FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE 0
+#endif
+#ifndef FFXM_FSR2_OPTION_SHADER_OPT_BALANCED
+#define FFXM_FSR2_OPTION_SHADER_OPT_BALANCED 0
+#endif
+/// FFXM_SHADER_QUALITY_BALANCED_OR_PERFORMANCE. Helper to identify if any of these profiles is used.
+#define FFXM_SHADER_QUALITY_BALANCED_OR_PERFORMANCE (FFXM_FSR2_OPTION_SHADER_OPT_BALANCED || FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE)
+
+/// Both Balanced/Performance. Keep the temporal reactive as a separate RT to improve bandwidth of color history buffer.
+#define FFXM_SHADER_QUALITY_OPT_SEPARATE_TEMPORAL_REACTIVE FFXM_SHADER_QUALITY_BALANCED_OR_PERFORMANCE
+/// Both Balanced/Performance. Disable deringing when doing the color reprojection with the history
+#define FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING FFXM_SHADER_QUALITY_BALANCED_OR_PERFORMANCE
+/// Both Balanced/Performance. Disable the Luma stability factor
+#define FFXM_SHADER_QUALITY_OPT_DISABLE_LUMA_INSTABILITY FFXM_SHADER_QUALITY_BALANCED_OR_PERFORMANCE
+/// Both Balanced/Performance. Use a 5-tap Lanczos kernel instead of the 9-tap used for `Quality` for upsampling
+#define FFXM_SHADER_QUALITY_OPT_UPSCALING_LANCZOS_5TAP FFXM_SHADER_QUALITY_BALANCED_OR_PERFORMANCE
+/// Balanced. Use Catmull-Rom (9 samples) for history reprojection
+#define FFXM_SHADER_QUALITY_OPT_REPROJECT_CATMULL_9TAP FFXM_FSR2_OPTION_SHADER_OPT_BALANCED
+/// Performance. PreparedInputColor is now stored as R8G8B8A8_Unorm tonemapped data. Rectification don't use `YCoCg` anymore
+#define FFXM_SHADER_QUALITY_OPT_TONEMAPPED_RGB_PREPARED_INPUT_COLOR FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE
+/// Performance. Use Catmull-Rom (5 samples) for history reprojection
+#define FFXM_SHADER_QUALITY_OPT_REPROJECT_CATMULL_5TAP FFXM_FSR2_OPTION_SHADER_OPT_PERFORMANCE
+
+#if !defined(FFXM_SHADER_PLATFORM_GLES_3_2)
+#define FFXM_SHADER_PLATFORM_GLES_3_2 (0)
+#endif
+
+/// A define for a true value in a boolean expression.
+///
+/// @ingroup GPUCore
+#define FFXM_TRUE (true)
+
+/// A define for a false value in a boolean expression.
+///
+/// @ingroup GPUCore
+#define FFXM_FALSE (false)
+
+/// A define value for positive infinity.
+///
+/// @ingroup GPUCore
+#define FFXM_POSITIVE_INFINITY_FLOAT ffxAsFloat(0x7f800000u)
+
+/// A define value for negative infinity.
+///
+/// @ingroup GPUCore
+#define FFXM_NEGATIVE_INFINITY_FLOAT ffxAsFloat(0xff800000u)
+
+/// A define value for PI.
+///
+/// @ingroup GPUCore
+#define FFXM_PI (3.14159)
+
+
+/// Compute the reciprocal of value.
+///
+/// @param [in] value The value to compute the reciprocal of.
+///
+/// @returns
+/// The 1 / value.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxReciprocal(FfxFloat32 value)
+{
+ return rcp(value);
+}
+
+/// Compute the reciprocal of value.
+///
+/// @param [in] value The value to compute the reciprocal of.
+///
+/// @returns
+/// The 1 / value.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxReciprocal(FfxFloat32x2 value)
+{
+ return rcp(value);
+}
+
+/// Compute the reciprocal of value.
+///
+/// @param [in] value The value to compute the reciprocal of.
+///
+/// @returns
+/// The 1 / value.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxReciprocal(FfxFloat32x3 value)
+{
+ return rcp(value);
+}
+
+/// Compute the reciprocal of value.
+///
+/// @param [in] value The value to compute the reciprocal of.
+///
+/// @returns
+/// The 1 / value.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxReciprocal(FfxFloat32x4 value)
+{
+ return rcp(value);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y)
+{
+ return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxMin(FfxFloat32x2 x, FfxFloat32x2 y)
+{
+ return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxMin(FfxFloat32x3 x, FfxFloat32x3 y)
+{
+ return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxMin(FfxFloat32x4 x, FfxFloat32x4 y)
+{
+ return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt32 ffxMin(FfxInt32 x, FfxInt32 y)
+{
+ return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt32x2 ffxMin(FfxInt32x2 x, FfxInt32x2 y)
+{
+ return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt32x3 ffxMin(FfxInt32x3 x, FfxInt32x3 y)
+{
+ return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt32x4 ffxMin(FfxInt32x4 x, FfxInt32x4 y)
+{
+ return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y)
+{
+ return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxMin(FfxUInt32x2 x, FfxUInt32x2 y)
+{
+ return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt32x3 ffxMin(FfxUInt32x3 x, FfxUInt32x3 y)
+{
+ return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt32x4 ffxMin(FfxUInt32x4 x, FfxUInt32x4 y)
+{
+ return min(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y)
+{
+ return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxMax(FfxFloat32x2 x, FfxFloat32x2 y)
+{
+ return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxMax(FfxFloat32x3 x, FfxFloat32x3 y)
+{
+ return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxMax(FfxFloat32x4 x, FfxFloat32x4 y)
+{
+ return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt32 ffxMax(FfxInt32 x, FfxInt32 y)
+{
+ return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt32x2 ffxMax(FfxInt32x2 x, FfxInt32x2 y)
+{
+ return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt32x3 ffxMax(FfxInt32x3 x, FfxInt32x3 y)
+{
+ return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt32x4 ffxMax(FfxInt32x4 x, FfxInt32x4 y)
+{
+ return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y)
+{
+ return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxMax(FfxUInt32x2 x, FfxUInt32x2 y)
+{
+ return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt32x3 ffxMax(FfxUInt32x3 x, FfxUInt32x3 y)
+{
+ return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt32x4 ffxMax(FfxUInt32x4 x, FfxUInt32x4 y)
+{
+ return max(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x The value to raise to the power y.
+/// @param [in] y The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxPow(FfxFloat32 x, FfxFloat32 y)
+{
+ return pow(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x The value to raise to the power y.
+/// @param [in] y The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxPow(FfxFloat32x2 x, FfxFloat32x2 y)
+{
+ return pow(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x The value to raise to the power y.
+/// @param [in] y The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxPow(FfxFloat32x3 x, FfxFloat32x3 y)
+{
+ return pow(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x The value to raise to the power y.
+/// @param [in] y The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxPow(FfxFloat32x4 x, FfxFloat32x4 y)
+{
+ return pow(x, y);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x The first value to compute the min of.
+///
+/// @returns
+/// The the square root of x.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxSqrt(FfxFloat32 x)
+{
+ return sqrt(x);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x The first value to compute the min of.
+///
+/// @returns
+/// The the square root of x.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxSqrt(FfxFloat32x2 x)
+{
+ return sqrt(x);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x The first value to compute the min of.
+///
+/// @returns
+/// The the square root of x.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxSqrt(FfxFloat32x3 x)
+{
+ return sqrt(x);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x The first value to compute the min of.
+///
+/// @returns
+/// The the square root of x.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxSqrt(FfxFloat32x4 x)
+{
+ return sqrt(x);
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d The value to copy the sign bit into.
+/// @param [in] s The value to copy the sign bit from.
+///
+/// @returns
+/// The value of d with the sign bit from s.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxCopySignBit(FfxFloat32 d, FfxFloat32 s)
+{
+ return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & FfxUInt32(0x80000000u)));
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d The value to copy the sign bit into.
+/// @param [in] s The value to copy the sign bit from.
+///
+/// @returns
+/// The value of d with the sign bit from s.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxCopySignBit(FfxFloat32x2 d, FfxFloat32x2 s)
+{
+ return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast2(0x80000000u)));
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d The value to copy the sign bit into.
+/// @param [in] s The value to copy the sign bit from.
+///
+/// @returns
+/// The value of d with the sign bit from s.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxCopySignBit(FfxFloat32x3 d, FfxFloat32x3 s)
+{
+ return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast3(0x80000000u)));
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d The value to copy the sign bit into.
+/// @param [in] s The value to copy the sign bit from.
+///
+/// @returns
+/// The value of d with the sign bit from s.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxCopySignBit(FfxFloat32x4 d, FfxFloat32x4 s)
+{
+ return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast4(0x80000000u)));
+}
+
+/// A single operation to return the following:
+/// m = NaN := 0
+/// m >= 0 := 0
+/// m < 0 := 1
+///
+/// Uses the following useful floating point logic,
+/// saturate(+a*(-INF)==-INF) := 0
+/// saturate( 0*(-INF)== NaN) := 0
+/// saturate(-a*(-INF)==+INF) := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m The value to test against 0.
+///
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxIsSigned(FfxFloat32 m)
+{
+ return ffxSaturate(m * FfxFloat32(FFXM_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// A single operation to return the following:
+/// m = NaN := 0
+/// m >= 0 := 0
+/// m < 0 := 1
+///
+/// Uses the following useful floating point logic,
+/// saturate(+a*(-INF)==-INF) := 0
+/// saturate( 0*(-INF)== NaN) := 0
+/// saturate(-a*(-INF)==+INF) := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m The value to test against 0.
+///
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxIsSigned(FfxFloat32x2 m)
+{
+ return ffxSaturate(m * ffxBroadcast2(FFXM_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// A single operation to return the following:
+/// m = NaN := 0
+/// m >= 0 := 0
+/// m < 0 := 1
+///
+/// Uses the following useful floating point logic,
+/// saturate(+a*(-INF)==-INF) := 0
+/// saturate( 0*(-INF)== NaN) := 0
+/// saturate(-a*(-INF)==+INF) := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m The value to test against 0.
+///
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxIsSigned(FfxFloat32x3 m)
+{
+ return ffxSaturate(m * ffxBroadcast3(FFXM_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// A single operation to return the following:
+/// m = NaN := 0
+/// m >= 0 := 0
+/// m < 0 := 1
+///
+/// Uses the following useful floating point logic,
+/// saturate(+a*(-INF)==-INF) := 0
+/// saturate( 0*(-INF)== NaN) := 0
+/// saturate(-a*(-INF)==+INF) := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m The value to test against for have the sign set.
+///
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or positive.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxIsSigned(FfxFloat32x4 m)
+{
+ return ffxSaturate(m * ffxBroadcast4(FFXM_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// A single operation to return the following:
+/// m = NaN := 1
+/// m > 0 := 0
+/// m <= 0 := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxIsGreaterThanZero(FfxFloat32 m)
+{
+ return ffxSaturate(m * FfxFloat32(FFXM_POSITIVE_INFINITY_FLOAT));
+}
+
+/// A single operation to return the following:
+/// m = NaN := 1
+/// m > 0 := 0
+/// m <= 0 := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxIsGreaterThanZero(FfxFloat32x2 m)
+{
+ return ffxSaturate(m * ffxBroadcast2(FFXM_POSITIVE_INFINITY_FLOAT));
+}
+
+/// A single operation to return the following:
+/// m = NaN := 1
+/// m > 0 := 0
+/// m <= 0 := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxIsGreaterThanZero(FfxFloat32x3 m)
+{
+ return ffxSaturate(m * ffxBroadcast3(FFXM_POSITIVE_INFINITY_FLOAT));
+}
+
+/// A single operation to return the following:
+/// m = NaN := 1
+/// m > 0 := 0
+/// m <= 0 := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxIsGreaterThanZero(FfxFloat32x4 m)
+{
+ return ffxSaturate(m * ffxBroadcast4(FFXM_POSITIVE_INFINITY_FLOAT));
+}
+
+/// Convert a 32bit floating point value to sortable integer.
+///
+/// - If sign bit=0, flip the sign bit (positives).
+/// - If sign bit=1, flip all bits (negatives).
+///
+/// The function has the side effects that:
+/// - Larger integers are more positive values.
+/// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage).
+///
+/// @param [in] value The floating point value to make sortable.
+///
+/// @returns
+/// The sortable integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxFloatToSortableInteger(FfxUInt32 value)
+{
+ return value ^ ((AShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000));
+}
+
+/// Convert a sortable integer to a 32bit floating point value.
+///
+/// The function has the side effects that:
+/// - If sign bit=1, flip the sign bit (positives).
+/// - If sign bit=0, flip all bits (negatives).
+///
+/// @param [in] value The floating point value to make sortable.
+///
+/// @returns
+/// The sortable integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxSortableIntegerToFloat(FfxUInt32 value)
+{
+ return value ^ ((~AShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000));
+}
+
+/// Calculate a low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateSqrt(FfxFloat32 value)
+{
+ return ffxAsFloat((ffxAsUInt32(value) >> FfxUInt32(1)) + FfxUInt32(0x1fbc4639));
+}
+
+/// Calculate a low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateReciprocal(FfxFloat32 value)
+{
+ return ffxAsFloat(FfxUInt32(0x7ef07ebb) - ffxAsUInt32(value));
+}
+
+/// Calculate a medium-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateReciprocalMedium(FfxFloat32 value)
+{
+ FfxFloat32 b = ffxAsFloat(FfxUInt32(0x7ef19fff) - ffxAsUInt32(value));
+ return b * (-b * value + FfxFloat32(2.0));
+}
+
+/// Calculate a low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value The value to calculate an approximate to the reciprocal square root for.
+///
+/// @returns
+/// An approximation of the reciprocal square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateReciprocalSquareRoot(FfxFloat32 value)
+{
+ return ffxAsFloat(FfxUInt32(0x5f347d74) - (ffxAsUInt32(value) >> FfxUInt32(1)));
+}
+
+/// Calculate a low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateSqrt(FfxFloat32x2 value)
+{
+ return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast2(1u)) + ffxBroadcast2(0x1fbc4639u));
+}
+
+/// Calculate a low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateReciprocal(FfxFloat32x2 value)
+{
+ return ffxAsFloat(ffxBroadcast2(0x7ef07ebbu) - ffxAsUInt32(value));
+}
+
+/// Calculate a medium-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateReciprocalMedium(FfxFloat32x2 value)
+{
+ FfxFloat32x2 b = ffxAsFloat(ffxBroadcast2(0x7ef19fffu) - ffxAsUInt32(value));
+ return b * (-b * value + ffxBroadcast2(2.0f));
+}
+
+/// Calculate a low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateReciprocalSquareRoot(FfxFloat32x2 value)
+{
+ return ffxAsFloat(ffxBroadcast2(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast2(1u)));
+}
+
+/// Calculate a low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateSqrt(FfxFloat32x3 value)
+{
+ return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast3(1u)) + ffxBroadcast3(0x1fbc4639u));
+}
+
+/// Calculate a low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateReciprocal(FfxFloat32x3 value)
+{
+ return ffxAsFloat(ffxBroadcast3(0x7ef07ebbu) - ffxAsUInt32(value));
+}
+
+/// Calculate a medium-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateReciprocalMedium(FfxFloat32x3 value)
+{
+ FfxFloat32x3 b = ffxAsFloat(ffxBroadcast3(0x7ef19fffu) - ffxAsUInt32(value));
+ return b * (-b * value + ffxBroadcast3(2.0f));
+}
+
+/// Calculate a low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateReciprocalSquareRoot(FfxFloat32x3 value)
+{
+ return ffxAsFloat(ffxBroadcast3(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast3(1u)));
+}
+
+/// Calculate a low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateSqrt(FfxFloat32x4 value)
+{
+ return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast4(1u)) + ffxBroadcast4(0x1fbc4639u));
+}
+
+/// Calculate a low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateReciprocal(FfxFloat32x4 value)
+{
+ return ffxAsFloat(ffxBroadcast4(0x7ef07ebbu) - ffxAsUInt32(value));
+}
+
+/// Calculate a medium-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateReciprocalMedium(FfxFloat32x4 value)
+{
+ FfxFloat32x4 b = ffxAsFloat(ffxBroadcast4(0x7ef19fffu) - ffxAsUInt32(value));
+ return b * (-b * value + ffxBroadcast4(2.0f));
+}
+
+/// Calculate a low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateReciprocalSquareRoot(FfxFloat32x4 value)
+{
+ return ffxAsFloat(ffxBroadcast4(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast4(1u)));
+}
+
+/// Calculate dot product of 'a' and 'b'.
+///
+/// @param [in] a First vector input.
+/// @param [in] b Second vector input.
+///
+/// @returns
+/// The value of a dot b.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b)
+{
+ return dot(a, b);
+}
+
+/// Calculate dot product of 'a' and 'b'.
+///
+/// @param [in] a First vector input.
+/// @param [in] b Second vector input.
+///
+/// @returns
+/// The value of a dot b.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b)
+{
+ return dot(a, b);
+}
+
+/// Calculate dot product of 'a' and 'b'.
+///
+/// @param [in] a First vector input.
+/// @param [in] b Second vector input.
+///
+/// @returns
+/// The value of a dot b.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b)
+{
+ return dot(a, b);
+}
+
+
+/// Compute an approximate conversion from PQ to Gamma2 space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between PQ and Gamma2.
+///
+/// @returns
+/// The value a converted into Gamma2.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximatePQToGamma2Medium(FfxFloat32 a)
+{
+ return a * a * a * a;
+}
+
+/// Compute an approximate conversion from PQ to linear space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between PQ and linear.
+///
+/// @returns
+/// The value a converted into linear.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximatePQToLinear(FfxFloat32 a)
+{
+ return a * a * a * a * a * a * a * a;
+}
+
+/// Compute an approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateGamma2ToPQ(FfxFloat32 a)
+{
+ return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46));
+}
+
+/// Compute a more accurate approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateGamma2ToPQMedium(FfxFloat32 a)
+{
+ FfxFloat32 b = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46));
+ FfxFloat32 b4 = b * b * b * b;
+ return b - b * (b4 - a) / (FfxFloat32(4.0) * b4);
+}
+
+/// Compute a high accuracy approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateGamma2ToPQHigh(FfxFloat32 a)
+{
+ return ffxSqrt(ffxSqrt(a));
+}
+
+/// Compute an approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between linear and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateLinearToPQ(FfxFloat32 a)
+{
+ return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723));
+}
+
+/// Compute a more accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between linear and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateLinearToPQMedium(FfxFloat32 a)
+{
+ FfxFloat32 b = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723));
+ FfxFloat32 b8 = b * b * b * b * b * b * b * b;
+ return b - b * (b8 - a) / (FfxFloat32(8.0) * b8);
+}
+
+/// Compute a very accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between linear and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateLinearToPQHigh(FfxFloat32 a)
+{
+ return ffxSqrt(ffxSqrt(ffxSqrt(a)));
+}
+
+/// Compute an approximate conversion from PQ to Gamma2 space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between PQ and Gamma2.
+///
+/// @returns
+/// The value a converted into Gamma2.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximatePQToGamma2Medium(FfxFloat32x2 a)
+{
+ return a * a * a * a;
+}
+
+/// Compute an approximate conversion from PQ to linear space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between PQ and linear.
+///
+/// @returns
+/// The value a converted into linear.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximatePQToLinear(FfxFloat32x2 a)
+{
+ return a * a * a * a * a * a * a * a;
+}
+
+/// Compute an approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateGamma2ToPQ(FfxFloat32x2 a)
+{
+ return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u));
+}
+
+/// Compute a more accurate approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateGamma2ToPQMedium(FfxFloat32x2 a)
+{
+ FfxFloat32x2 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u));
+ FfxFloat32x2 b4 = b * b * b * b;
+ return b - b * (b4 - a) / (FfxFloat32(4.0) * b4);
+}
+
+/// Compute a high accuracy approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateGamma2ToPQHigh(FfxFloat32x2 a)
+{
+ return ffxSqrt(ffxSqrt(a));
+}
+
+/// Compute an approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between linear and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateLinearToPQ(FfxFloat32x2 a)
+{
+ return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u));
+}
+
+/// Compute a more accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between linear and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateLinearToPQMedium(FfxFloat32x2 a)
+{
+ FfxFloat32x2 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u));
+ FfxFloat32x2 b8 = b * b * b * b * b * b * b * b;
+ return b - b * (b8 - a) / (FfxFloat32(8.0) * b8);
+}
+
+/// Compute a very accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between linear and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateLinearToPQHigh(FfxFloat32x2 a)
+{
+ return ffxSqrt(ffxSqrt(ffxSqrt(a)));
+}
+
+/// Compute an approximate conversion from PQ to Gamma2 space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between PQ and Gamma2.
+///
+/// @returns
+/// The value a converted into Gamma2.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximatePQToGamma2Medium(FfxFloat32x3 a)
+{
+ return a * a * a * a;
+}
+
+/// Compute an approximate conversion from PQ to linear space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between PQ and linear.
+///
+/// @returns
+/// The value a converted into linear.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximatePQToLinear(FfxFloat32x3 a)
+{
+ return a * a * a * a * a * a * a * a;
+}
+
+/// Compute an approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateGamma2ToPQ(FfxFloat32x3 a)
+{
+ return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u));
+}
+
+/// Compute a more accurate approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateGamma2ToPQMedium(FfxFloat32x3 a)
+{
+ FfxFloat32x3 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u));
+ FfxFloat32x3 b4 = b * b * b * b;
+ return b - b * (b4 - a) / (FfxFloat32(4.0) * b4);
+}
+
+/// Compute a high accuracy approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateGamma2ToPQHigh(FfxFloat32x3 a)
+{
+ return ffxSqrt(ffxSqrt(a));
+}
+
+/// Compute an approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between linear and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateLinearToPQ(FfxFloat32x3 a)
+{
+ return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u));
+}
+
+/// Compute a more accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between linear and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateLinearToPQMedium(FfxFloat32x3 a)
+{
+ FfxFloat32x3 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u));
+ FfxFloat32x3 b8 = b * b * b * b * b * b * b * b;
+ return b - b * (b8 - a) / (FfxFloat32(8.0) * b8);
+}
+
+/// Compute a very accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between linear and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateLinearToPQHigh(FfxFloat32x3 a)
+{
+ return ffxSqrt(ffxSqrt(ffxSqrt(a)));
+}
+
+/// Compute an approximate conversion from PQ to Gamma2 space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between PQ and Gamma2.
+///
+/// @returns
+/// The value a converted into Gamma2.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximatePQToGamma2Medium(FfxFloat32x4 a)
+{
+ return a * a * a * a;
+}
+
+/// Compute an approximate conversion from PQ to linear space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between PQ and linear.
+///
+/// @returns
+/// The value a converted into linear.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximatePQToLinear(FfxFloat32x4 a)
+{
+ return a * a * a * a * a * a * a * a;
+}
+
+/// Compute an approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateGamma2ToPQ(FfxFloat32x4 a)
+{
+ return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u));
+}
+
+/// Compute a more accurate approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateGamma2ToPQMedium(FfxFloat32x4 a)
+{
+ FfxFloat32x4 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u));
+ FfxFloat32x4 b4 = b * b * b * b * b * b * b * b;
+ return b - b * (b4 - a) / (FfxFloat32(4.0) * b4);
+}
+
+/// Compute a high accuracy approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateGamma2ToPQHigh(FfxFloat32x4 a)
+{
+ return ffxSqrt(ffxSqrt(a));
+}
+
+/// Compute an approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between linear and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateLinearToPQ(FfxFloat32x4 a)
+{
+ return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u));
+}
+
+/// Compute a more accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between linear and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateLinearToPQMedium(FfxFloat32x4 a)
+{
+ FfxFloat32x4 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u));
+ FfxFloat32x4 b8 = b * b * b * b * b * b * b * b;
+ return b - b * (b8 - a) / (FfxFloat32(8.0) * b8);
+}
+
+/// Compute a very accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a The value to convert between linear and PQ.
+///
+/// @returns
+/// The value a converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateLinearToPQHigh(FfxFloat32x4 a)
+{
+ return ffxSqrt(ffxSqrt(ffxSqrt(a)));
+}
+
+// An approximation of sine.
+//
+// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+// is {-1/4 to 1/4} representing {-1 to 1}.
+//
+// @param [in] value The value to calculate approximate sine for.
+//
+// @returns
+// The approximate sine of value.
+FfxFloat32 ffxParabolicSin(FfxFloat32 value)
+{
+ return value * abs(value) - value;
+}
+
+// An approximation of sine.
+//
+// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+// is {-1/4 to 1/4} representing {-1 to 1}.
+//
+// @param [in] value The value to calculate approximate sine for.
+//
+// @returns
+// The approximate sine of value.
+FfxFloat32x2 ffxParabolicSin(FfxFloat32x2 x)
+{
+ return x * abs(x) - x;
+}
+
+// An approximation of cosine.
+//
+// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+// is {-1/4 to 1/4} representing {-1 to 1}.
+//
+// @param [in] value The value to calculate approximate cosine for.
+//
+// @returns
+// The approximate cosine of value.
+FfxFloat32 ffxParabolicCos(FfxFloat32 x)
+{
+ x = ffxFract(x * FfxFloat32(0.5) + FfxFloat32(0.75));
+ x = x * FfxFloat32(2.0) - FfxFloat32(1.0);
+ return ffxParabolicSin(x);
+}
+
+// An approximation of cosine.
+//
+// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+// is {-1/4 to 1/4} representing {-1 to 1}.
+//
+// @param [in] value The value to calculate approximate cosine for.
+//
+// @returns
+// The approximate cosine of value.
+FfxFloat32x2 ffxParabolicCos(FfxFloat32x2 x)
+{
+ x = ffxFract(x * ffxBroadcast2(0.5f) + ffxBroadcast2(0.75f));
+ x = x * ffxBroadcast2(2.0f) - ffxBroadcast2(1.0f);
+ return ffxParabolicSin(x);
+}
+
+// An approximation of both sine and cosine.
+//
+// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+// is {-1/4 to 1/4} representing {-1 to 1}.
+//
+// @param [in] value The value to calculate approximate cosine for.
+//
+// @returns
+// A FfxFloat32x2 containing approximations of both sine and cosine of value.
+FfxFloat32x2 ffxParabolicSinCos(FfxFloat32 x)
+{
+ FfxFloat32 y = ffxFract(x * FfxFloat32(0.5) + FfxFloat32(0.75));
+ y = y * FfxFloat32(2.0) - FfxFloat32(1.0);
+ return ffxParabolicSin(FfxFloat32x2(x, y));
+}
+
+/// Conditional free logic AND operation using values.
+///
+/// @param [in] x The first value to be fed into the AND operator.
+/// @param [in] y The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxZeroOneAnd(FfxUInt32 x, FfxUInt32 y)
+{
+ return min(x, y);
+}
+
+/// Conditional free logic AND operation using two values.
+///
+/// @param [in] x The first value to be fed into the AND operator.
+/// @param [in] y The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x, FfxUInt32x2 y)
+{
+ return min(x, y);
+}
+
+/// Conditional free logic AND operation using two values.
+///
+/// @param [in] x The first value to be fed into the AND operator.
+/// @param [in] y The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x, FfxUInt32x3 y)
+{
+ return min(x, y);
+}
+
+/// Conditional free logic AND operation using two values.
+///
+/// @param [in] x The first value to be fed into the AND operator.
+/// @param [in] y The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x, FfxUInt32x4 y)
+{
+ return min(x, y);
+}
+
+/// Conditional free logic NOT operation using two values.
+///
+/// @param [in] x The first value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxZeroOneAnd(FfxUInt32 x)
+{
+ return x ^ FfxUInt32(1);
+}
+
+/// Conditional free logic NOT operation using two values.
+///
+/// @param [in] x The first value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x)
+{
+ return x ^ ffxBroadcast2(1u);
+}
+
+/// Conditional free logic NOT operation using two values.
+///
+/// @param [in] x The first value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x)
+{
+ return x ^ ffxBroadcast3(1u);
+}
+
+/// Conditional free logic NOT operation using two values.
+///
+/// @param [in] x The first value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x)
+{
+ return x ^ ffxBroadcast4(1u);
+}
+
+/// Conditional free logic OR operation using two values.
+///
+/// @param [in] x The first value to be fed into the OR operator.
+/// @param [in] y The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxZeroOneOr(FfxUInt32 x, FfxUInt32 y)
+{
+ return max(x, y);
+}
+
+/// Conditional free logic OR operation using two values.
+///
+/// @param [in] x The first value to be fed into the OR operator.
+/// @param [in] y The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxZeroOneOr(FfxUInt32x2 x, FfxUInt32x2 y)
+{
+ return max(x, y);
+}
+
+/// Conditional free logic OR operation using two values.
+///
+/// @param [in] x The first value to be fed into the OR operator.
+/// @param [in] y The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x3 ffxZeroOneOr(FfxUInt32x3 x, FfxUInt32x3 y)
+{
+ return max(x, y);
+}
+
+/// Conditional free logic OR operation using two values.
+///
+/// @param [in] x The first value to be fed into the OR operator.
+/// @param [in] y The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x4 ffxZeroOneOr(FfxUInt32x4 x, FfxUInt32x4 y)
+{
+ return max(x, y);
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxZeroOneAndToU1(FfxFloat32 x)
+{
+ return FfxUInt32(FfxFloat32(1.0) - x);
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxZeroOneAndToU2(FfxFloat32x2 x)
+{
+ return FfxUInt32x2(ffxBroadcast2(1.0) - x);
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x3 ffxZeroOneAndToU3(FfxFloat32x3 x)
+{
+ return FfxUInt32x3(ffxBroadcast3(1.0) - x);
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x4 ffxZeroOneAndToU4(FfxFloat32x4 x)
+{
+ return FfxUInt32x4(ffxBroadcast4(1.0) - x);
+}
+
+/// Conditional free logic AND operation using two values followed by a NOT operation
+/// using the resulting value and a third value.
+///
+/// @param [in] x The first value to be fed into the AND operator.
+/// @param [in] y The second value to be fed into the AND operator.
+/// @param [in] z The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxZeroOneAndOr(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
+{
+ return ffxSaturate(x * y + z);
+}
+
+/// Conditional free logic AND operation using two values followed by a NOT operation
+/// using the resulting value and a third value.
+///
+/// @param [in] x The first value to be fed into the AND operator.
+/// @param [in] y The second value to be fed into the AND operator.
+/// @param [in] z The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxZeroOneAndOr(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
+{
+ return ffxSaturate(x * y + z);
+}
+
+/// Conditional free logic AND operation using two values followed by a NOT operation
+/// using the resulting value and a third value.
+///
+/// @param [in] x The first value to be fed into the AND operator.
+/// @param [in] y The second value to be fed into the AND operator.
+/// @param [in] z The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxZeroOneAndOr(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
+{
+ return ffxSaturate(x * y + z);
+}
+
+/// Conditional free logic AND operation using two values followed by a NOT operation
+/// using the resulting value and a third value.
+///
+/// @param [in] x The first value to be fed into the AND operator.
+/// @param [in] y The second value to be fed into the AND operator.
+/// @param [in] z The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxZeroOneAndOr(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
+{
+ return ffxSaturate(x * y + z);
+}
+
+/// Given a value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxZeroOneIsGreaterThanZero(FfxFloat32 x)
+{
+ return ffxSaturate(x * FfxFloat32(FFXM_POSITIVE_INFINITY_FLOAT));
+}
+
+/// Given a value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxZeroOneIsGreaterThanZero(FfxFloat32x2 x)
+{
+ return ffxSaturate(x * ffxBroadcast2(FFXM_POSITIVE_INFINITY_FLOAT));
+}
+
+/// Given a value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxZeroOneIsGreaterThanZero(FfxFloat32x3 x)
+{
+ return ffxSaturate(x * ffxBroadcast3(FFXM_POSITIVE_INFINITY_FLOAT));
+}
+
+/// Given a value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxZeroOneIsGreaterThanZero(FfxFloat32x4 x)
+{
+ return ffxSaturate(x * ffxBroadcast4(FFXM_POSITIVE_INFINITY_FLOAT));
+}
+
+/// Conditional free logic signed NOT operation using two FfxFloat32 values.
+///
+/// @param [in] x The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxZeroOneAnd(FfxFloat32 x)
+{
+ return FfxFloat32(1.0) - x;
+}
+
+/// Conditional free logic signed NOT operation using two FfxFloat32 values.
+///
+/// @param [in] x The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxZeroOneAnd(FfxFloat32x2 x)
+{
+ return ffxBroadcast2(1.0) - x;
+}
+
+/// Conditional free logic signed NOT operation using two FfxFloat32 values.
+///
+/// @param [in] x The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxZeroOneAnd(FfxFloat32x3 x)
+{
+ return ffxBroadcast3(1.0) - x;
+}
+
+/// Conditional free logic signed NOT operation using two FfxFloat32 values.
+///
+/// @param [in] x The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxZeroOneAnd(FfxFloat32x4 x)
+{
+ return ffxBroadcast4(1.0) - x;
+}
+
+/// Conditional free logic OR operation using two FfxFloat32 values.
+///
+/// @param [in] x The first value to be fed into the OR operator.
+/// @param [in] y The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxZeroOneOr(FfxFloat32 x, FfxFloat32 y)
+{
+ return max(x, y);
+}
+
+/// Conditional free logic OR operation using two FfxFloat32 values.
+///
+/// @param [in] x The first value to be fed into the OR operator.
+/// @param [in] y The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxZeroOneOr(FfxFloat32x2 x, FfxFloat32x2 y)
+{
+ return max(x, y);
+}
+
+/// Conditional free logic OR operation using two FfxFloat32 values.
+///
+/// @param [in] x The first value to be fed into the OR operator.
+/// @param [in] y The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxZeroOneOr(FfxFloat32x3 x, FfxFloat32x3 y)
+{
+ return max(x, y);
+}
+
+/// Conditional free logic OR operation using two FfxFloat32 values.
+///
+/// @param [in] x The first value to be fed into the OR operator.
+/// @param [in] y The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxZeroOneOr(FfxFloat32x4 x, FfxFloat32x4 y)
+{
+ return max(x, y);
+}
+
+/// Choose between two FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x The value to compare against zero.
+/// @param [in] y The value to return if the comparision is greater than zero.
+/// @param [in] z The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxZeroOneSelect(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
+{
+ FfxFloat32 r = (-x) * z + z;
+ return x * y + r;
+}
+
+/// Choose between two FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x The value to compare against zero.
+/// @param [in] y The value to return if the comparision is greater than zero.
+/// @param [in] z The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxZeroOneSelect(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
+{
+ FfxFloat32x2 r = (-x) * z + z;
+ return x * y + r;
+}
+
+/// Choose between two FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x The value to compare against zero.
+/// @param [in] y The value to return if the comparision is greater than zero.
+/// @param [in] z The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxZeroOneSelect(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
+{
+ FfxFloat32x3 r = (-x) * z + z;
+ return x * y + r;
+}
+
+/// Choose between two FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x The value to compare against zero.
+/// @param [in] y The value to return if the comparision is greater than zero.
+/// @param [in] z The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxZeroOneSelect(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
+{
+ FfxFloat32x4 r = (-x) * z + z;
+ return x * y + r;
+}
+
+/// Given a value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxZeroOneIsSigned(FfxFloat32 x)
+{
+ return ffxSaturate(x * FfxFloat32(FFXM_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// Given a value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxZeroOneIsSigned(FfxFloat32x2 x)
+{
+ return ffxSaturate(x * ffxBroadcast2(FFXM_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// Given a value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxZeroOneIsSigned(FfxFloat32x3 x)
+{
+ return ffxSaturate(x * ffxBroadcast3(FFXM_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// Given a value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxZeroOneIsSigned(FfxFloat32x4 x)
+{
+ return ffxSaturate(x * ffxBroadcast4(FFXM_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// Compute a Rec.709 color space.
+///
+/// Rec.709 is used for some HDTVs.
+///
+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
+/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
+/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
+///
+/// @param [in] color The color to convert to Rec. 709.
+///
+/// @returns
+/// The color in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxRec709FromLinear(FfxFloat32 color)
+{
+ FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45);
+ FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099);
+ return clamp(j.x, color * j.y, pow(color, j.z) * k.x + k.y);
+}
+
+/// Compute a Rec.709 color space.
+///
+/// Rec.709 is used for some HDTVs.
+///
+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
+/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
+/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
+///
+/// @param [in] color The color to convert to Rec. 709.
+///
+/// @returns
+/// The color in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxRec709FromLinear(FfxFloat32x2 color)
+{
+ FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45);
+ FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099);
+ return clamp(j.xx, color * j.yy, pow(color, j.zz) * k.xx + k.yy);
+}
+
+/// Compute a Rec.709 color space.
+///
+/// Rec.709 is used for some HDTVs.
+///
+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
+/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
+/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
+///
+/// @param [in] color The color to convert to Rec. 709.
+///
+/// @returns
+/// The color in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxRec709FromLinear(FfxFloat32x3 color)
+{
+ FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45);
+ FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099);
+ return clamp(j.xxx, color * j.yyy, pow(color, j.zzz) * k.xxx + k.yyy);
+}
+
+/// Compute a gamma value from a linear value.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGamma.
+///
+/// @param [in] value The value to convert to gamma space from linear.
+/// @param [in] power The reciprocal of power value used for the gamma curve.
+///
+/// @returns
+/// A value in gamma space.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxGammaFromLinear(FfxFloat32 value, FfxFloat32 power)
+{
+ return pow(value, FfxFloat32(power));
+}
+
+/// Compute a gamma value from a linear value.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGamma.
+///
+/// @param [in] value The value to convert to gamma space from linear.
+/// @param [in] power The reciprocal of power value used for the gamma curve.
+///
+/// @returns
+/// A value in gamma space.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxGammaFromLinear(FfxFloat32x2 value, FfxFloat32 power)
+{
+ return pow(value, ffxBroadcast2(power));
+}
+
+/// Compute a gamma value from a linear value.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGamma.
+///
+/// @param [in] value The value to convert to gamma space from linear.
+/// @param [in] power The reciprocal of power value used for the gamma curve.
+///
+/// @returns
+/// A value in gamma space.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxGammaFromLinear(FfxFloat32x3 value, FfxFloat32 power)
+{
+ return pow(value, ffxBroadcast3(power));
+}
+
+/// Compute a PQ value from a linear value.
+///
+/// @param [in] value The value to convert to PQ from linear.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxPQToLinear(FfxFloat32 value)
+{
+ FfxFloat32 p = pow(value, FfxFloat32(0.159302));
+ return pow((FfxFloat32(0.835938) + FfxFloat32(18.8516) * p) / (FfxFloat32(1.0) + FfxFloat32(18.6875) * p), FfxFloat32(78.8438));
+}
+
+/// Compute a PQ value from a linear value.
+///
+/// @param [in] value The value to convert to PQ from linear.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxPQToLinear(FfxFloat32x2 value)
+{
+ FfxFloat32x2 p = pow(value, ffxBroadcast2(0.159302));
+ return pow((ffxBroadcast2(0.835938) + ffxBroadcast2(18.8516) * p) / (ffxBroadcast2(1.0) + ffxBroadcast2(18.6875) * p), ffxBroadcast2(78.8438));
+}
+
+/// Compute a PQ value from a linear value.
+///
+/// @param [in] value The value to convert to PQ from linear.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxPQToLinear(FfxFloat32x3 value)
+{
+ FfxFloat32x3 p = pow(value, ffxBroadcast3(0.159302));
+ return pow((ffxBroadcast3(0.835938) + ffxBroadcast3(18.8516) * p) / (ffxBroadcast3(1.0) + ffxBroadcast3(18.6875) * p), ffxBroadcast3(78.8438));
+}
+
+/// Compute a linear value from a SRGB value.
+///
+/// @param [in] value The value to convert to linear from SRGB.
+///
+/// @returns
+/// A value in SRGB space.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxSrgbToLinear(FfxFloat32 value)
+{
+ FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+ FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055);
+ return clamp(j.x, value * j.y, pow(value, j.z) * k.x + k.y);
+}
+
+/// Compute a linear value from a SRGB value.
+///
+/// @param [in] value The value to convert to linear from SRGB.
+///
+/// @returns
+/// A value in SRGB space.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxSrgbToLinear(FfxFloat32x2 value)
+{
+ FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+ FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055);
+ return clamp(j.xx, value * j.yy, pow(value, j.zz) * k.xx + k.yy);
+}
+
+/// Compute a linear value from a SRGB value.
+///
+/// @param [in] value The value to convert to linear from SRGB.
+///
+/// @returns
+/// A value in SRGB space.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxSrgbToLinear(FfxFloat32x3 value)
+{
+ FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+ FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055);
+ return clamp(j.xxx, value * j.yyy, pow(value, j.zzz) * k.xxx + k.yyy);
+}
+
+/// Compute a linear value from a REC.709 value.
+///
+/// @param [in] color The value to convert to linear from REC.709.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxLinearFromRec709(FfxFloat32 color)
+{
+ FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
+ FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099);
+ return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.x), color * j.y, pow(color * k.x + k.y, j.z));
+}
+
+/// Compute a linear value from a REC.709 value.
+///
+/// @param [in] color The value to convert to linear from REC.709.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxLinearFromRec709(FfxFloat32x2 color)
+{
+ FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
+ FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099);
+ return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xx), color * j.yy, pow(color * k.xx + k.yy, j.zz));
+}
+
+/// Compute a linear value from a REC.709 value.
+///
+/// @param [in] color The value to convert to linear from REC.709.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxLinearFromRec709(FfxFloat32x3 color)
+{
+ FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
+ FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099);
+ return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xxx), color * j.yyy, pow(color * k.xxx + k.yyy, j.zzz));
+}
+
+/// Compute a linear value from a value in a gamma space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] color The value to convert to linear in gamma space.
+/// @param [in] power The power value used for the gamma curve.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxLinearFromGamma(FfxFloat32 color, FfxFloat32 power)
+{
+ return pow(color, FfxFloat32(power));
+}
+
+/// Compute a linear value from a value in a gamma space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] color The value to convert to linear in gamma space.
+/// @param [in] power The power value used for the gamma curve.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxLinearFromGamma(FfxFloat32x2 color, FfxFloat32 power)
+{
+ return pow(color, ffxBroadcast2(power));
+}
+
+/// Compute a linear value from a value in a gamma space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] color The value to convert to linear in gamma space.
+/// @param [in] power The power value used for the gamma curve.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxLinearFromGamma(FfxFloat32x3 color, FfxFloat32 power)
+{
+ return pow(color, ffxBroadcast3(power));
+}
+
+/// Compute a linear value from a value in a PQ space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] value The value to convert to linear in PQ space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxLinearFromPQ(FfxFloat32 value)
+{
+ FfxFloat32 p = pow(value, FfxFloat32(0.0126833));
+ return pow(ffxSaturate(p - FfxFloat32(0.835938)) / (FfxFloat32(18.8516) - FfxFloat32(18.6875) * p), FfxFloat32(6.27739));
+}
+
+/// Compute a linear value from a value in a PQ space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] value The value to convert to linear in PQ space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxLinearFromPQ(FfxFloat32x2 value)
+{
+ FfxFloat32x2 p = pow(value, ffxBroadcast2(0.0126833));
+ return pow(ffxSaturate(p - ffxBroadcast2(0.835938)) / (ffxBroadcast2(18.8516) - ffxBroadcast2(18.6875) * p), ffxBroadcast2(6.27739));
+}
+
+/// Compute a linear value from a value in a PQ space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] value The value to convert to linear in PQ space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxLinearFromPQ(FfxFloat32x3 value)
+{
+ FfxFloat32x3 p = pow(value, ffxBroadcast3(0.0126833));
+ return pow(ffxSaturate(p - ffxBroadcast3(0.835938)) / (ffxBroadcast3(18.8516) - ffxBroadcast3(18.6875) * p), ffxBroadcast3(6.27739));
+}
+
+/// Compute a linear value from a value in a SRGB space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] value The value to convert to linear in SRGB space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxLinearFromSrgb(FfxFloat32 value)
+{
+ FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+ FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
+ return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.x), value * j.y, pow(value * k.x + k.y, j.z));
+}
+
+/// Compute a linear value from a value in a SRGB space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] value The value to convert to linear in SRGB space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 value)
+{
+ FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+ FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
+ return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xx), value * j.yy, pow(value * k.xx + k.yy, j.zz));
+}
+
+/// Compute a linear value from a value in a SRGB space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] value The value to convert to linear in SRGB space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxLinearFromSrgb(FfxFloat32x3 value)
+{
+ FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+ FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
+ return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xxx), value * j.yyy, pow(value * k.xxx + k.yyy, j.zzz));
+}
+
+/// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear.
+///
+/// 543210
+/// ======
+/// ..xxx.
+/// yy...y
+///
+/// @param [in] a The input 1D coordinates to remap.
+///
+/// @returns
+/// The remapped 2D coordinates.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxRemapForQuad(FfxUInt32 a)
+{
+ return FfxUInt32x2(bitfieldExtract(a, 1u, 3u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), a, 1u));
+}
+
+/// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions.
+///
+/// The 64-wide lane indices to 8x8 remapping is performed as follows:
+///
+/// 00 01 08 09 10 11 18 19
+/// 02 03 0a 0b 12 13 1a 1b
+/// 04 05 0c 0d 14 15 1c 1d
+/// 06 07 0e 0f 16 17 1e 1f
+/// 20 21 28 29 30 31 38 39
+/// 22 23 2a 2b 32 33 3a 3b
+/// 24 25 2c 2d 34 35 3c 3d
+/// 26 27 2e 2f 36 37 3e 3f
+///
+/// @param [in] a The input 1D coordinate to remap.
+///
+/// @returns
+/// The remapped 2D coordinates.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxRemapForWaveReduction(FfxUInt32 a)
+{
+ return FfxUInt32x2(bitfieldInsertMask(bitfieldExtract(a, 2u, 3u), a, 1u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), bitfieldExtract(a, 1u, 2u), 2u));
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common.h.meta
new file mode 100644
index 0000000..e85b1e0
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common.h.meta
@@ -0,0 +1,67 @@
+fileFormatVersion: 2
+guid: 049e52a8031c0c44f9c2b503e90b844e
+PluginImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ iconMap: {}
+ executionOrder: {}
+ defineConstraints: []
+ isPreloaded: 0
+ isOverridable: 1
+ isExplicitlyReferenced: 0
+ validateReferences: 1
+ platformData:
+ - first:
+ : Any
+ second:
+ enabled: 0
+ settings:
+ Exclude Android: 1
+ Exclude Editor: 1
+ Exclude GameCoreScarlett: 1
+ Exclude GameCoreXboxOne: 1
+ Exclude Linux64: 1
+ Exclude OSXUniversal: 1
+ Exclude PS4: 1
+ Exclude PS5: 1
+ Exclude WebGL: 1
+ Exclude Win: 1
+ Exclude Win64: 1
+ - first:
+ Any:
+ second:
+ enabled: 0
+ settings: {}
+ - first:
+ Editor: Editor
+ second:
+ enabled: 0
+ settings:
+ DefaultValueInitialized: true
+ - first:
+ Standalone: Linux64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: OSXUniversal
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common_half.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common_half.h
new file mode 100644
index 0000000..3f8ac4b
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common_half.h
@@ -0,0 +1,2978 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#if FFXM_HALF
+#if FFXM_HLSL_6_2
+/// A define value for 16bit positive infinity.
+///
+/// @ingroup GPUCore
+#define FFXM_POSITIVE_INFINITY_HALF FFXM_TO_FLOAT16((uint16_t)0x7c00u)
+
+/// A define value for 16bit negative infinity.
+///
+/// @ingroup GPUCore
+#define FFXM_NEGATIVE_INFINITY_HALF FFXM_TO_FLOAT16((uint16_t)0xfc00u)
+#else
+/// A define value for 16bit positive infinity.
+///
+/// @ingroup GPUCore
+#define FFXM_POSITIVE_INFINITY_HALF FFXM_TO_FLOAT16(0x7c00u)
+
+/// A define value for 16bit negative infinity.
+///
+/// @ingroup GPUCore
+#define FFXM_NEGATIVE_INFINITY_HALF FFXM_TO_FLOAT16(0xfc00u)
+#endif // FFXM_HLSL_6_2
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxMin(FfxFloat16 x, FfxFloat16 y)
+{
+ return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxMin(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+ return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxMin(FfxFloat16x3 x, FfxFloat16x3 y)
+{
+ return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxMin(FfxFloat16x4 x, FfxFloat16x4 y)
+{
+ return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt16 ffxMin(FfxInt16 x, FfxInt16 y)
+{
+ return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt16x2 ffxMin(FfxInt16x2 x, FfxInt16x2 y)
+{
+ return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt16x3 ffxMin(FfxInt16x3 x, FfxInt16x3 y)
+{
+ return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt16x4 ffxMin(FfxInt16x4 x, FfxInt16x4 y)
+{
+ return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt16 ffxMin(FfxUInt16 x, FfxUInt16 y)
+{
+ return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt16x2 ffxMin(FfxUInt16x2 x, FfxUInt16x2 y)
+{
+ return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt16x3 ffxMin(FfxUInt16x3 x, FfxUInt16x3 y)
+{
+ return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x The first value to compute the min of.
+/// @param [in] y The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt16x4 ffxMin(FfxUInt16x4 x, FfxUInt16x4 y)
+{
+ return min(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxMax(FfxFloat16 x, FfxFloat16 y)
+{
+ return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxMax(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+ return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxMax(FfxFloat16x3 x, FfxFloat16x3 y)
+{
+ return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxMax(FfxFloat16x4 x, FfxFloat16x4 y)
+{
+ return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt16 ffxMax(FfxInt16 x, FfxInt16 y)
+{
+ return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt16x2 ffxMax(FfxInt16x2 x, FfxInt16x2 y)
+{
+ return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt16x3 ffxMax(FfxInt16x3 x, FfxInt16x3 y)
+{
+ return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt16x4 ffxMax(FfxInt16x4 x, FfxInt16x4 y)
+{
+ return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt16 ffxMax(FfxUInt16 x, FfxUInt16 y)
+{
+ return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt16x2 ffxMax(FfxUInt16x2 x, FfxUInt16x2 y)
+{
+ return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt16x3 ffxMax(FfxUInt16x3 x, FfxUInt16x3 y)
+{
+ return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x The first value to compute the max of.
+/// @param [in] y The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt16x4 ffxMax(FfxUInt16x4 x, FfxUInt16x4 y)
+{
+ return max(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x The value to raise to the power y.
+/// @param [in] y The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxPow(FfxFloat16 x, FfxFloat16 y)
+{
+ return pow(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x The value to raise to the power y.
+/// @param [in] y The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPow(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+ return pow(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x The value to raise to the power y.
+/// @param [in] y The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxPow(FfxFloat16x3 x, FfxFloat16x3 y)
+{
+ return pow(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x The value to raise to the power y.
+/// @param [in] y The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxPow(FfxFloat16x4 x, FfxFloat16x4 y)
+{
+ return pow(x, y);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x The first value to compute the min of.
+///
+/// @returns
+/// The the square root of x.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxSqrt(FfxFloat16 x)
+{
+ return sqrt(x);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x The first value to compute the min of.
+///
+/// @returns
+/// The the square root of x.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxSqrt(FfxFloat16x2 x)
+{
+ return sqrt(x);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x The first value to compute the min of.
+///
+/// @returns
+/// The the square root of x.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxSqrt(FfxFloat16x3 x)
+{
+ return sqrt(x);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x The first value to compute the min of.
+///
+/// @returns
+/// The the square root of x.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxSqrt(FfxFloat16x4 x)
+{
+ return sqrt(x);
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d The value to copy the sign bit into.
+/// @param [in] s The value to copy the sign bit from.
+///
+/// @returns
+/// The value of d with the sign bit from s.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxCopySignBitHalf(FfxFloat16 d, FfxFloat16 s)
+{
+ return FFXM_TO_FLOAT16(FFXM_TO_UINT16(d) | (FFXM_TO_UINT16(s) & FFXM_BROADCAST_UINT16(0x8000u)));
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d The value to copy the sign bit into.
+/// @param [in] s The value to copy the sign bit from.
+///
+/// @returns
+/// The value of d with the sign bit from s.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxCopySignBitHalf(FfxFloat16x2 d, FfxFloat16x2 s)
+{
+ return FFXM_TO_FLOAT16X2(FFXM_TO_UINT16X2(d) | (FFXM_TO_UINT16X2(s) & FFXM_BROADCAST_UINT16X2(0x8000u)));
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d The value to copy the sign bit into.
+/// @param [in] s The value to copy the sign bit from.
+///
+/// @returns
+/// The value of d with the sign bit from s.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxCopySignBitHalf(FfxFloat16x3 d, FfxFloat16x3 s)
+{
+ return FFXM_TO_FLOAT16X3(FFXM_TO_UINT16X3(d) | (FFXM_TO_UINT16X3(s) & FFXM_BROADCAST_UINT16X3(0x8000u)));
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d The value to copy the sign bit into.
+/// @param [in] s The value to copy the sign bit from.
+///
+/// @returns
+/// The value of d with the sign bit from s.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxCopySignBitHalf(FfxFloat16x4 d, FfxFloat16x4 s)
+{
+ return FFXM_TO_FLOAT16X4(FFXM_TO_UINT16X4(d) | (FFXM_TO_UINT16X4(s) & FFXM_BROADCAST_UINT16X4(0x8000u)));
+}
+
+/// A single operation to return the following:
+/// m = NaN := 0
+/// m >= 0 := 0
+/// m < 0 := 1
+///
+/// Uses the following useful floating point logic,
+/// saturate(+a*(-INF)==-INF) := 0
+/// saturate( 0*(-INF)== NaN) := 0
+/// saturate(-a*(-INF)==+INF) := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m The value to test against 0.
+///
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxIsSignedHalf(FfxFloat16 m)
+{
+ return ffxSaturate(m * FFXM_BROADCAST_FLOAT16(FFXM_NEGATIVE_INFINITY_HALF));
+}
+
+/// A single operation to return the following:
+/// m = NaN := 0
+/// m >= 0 := 0
+/// m < 0 := 1
+///
+/// Uses the following useful floating point logic,
+/// saturate(+a*(-INF)==-INF) := 0
+/// saturate( 0*(-INF)== NaN) := 0
+/// saturate(-a*(-INF)==+INF) := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m The value to test against 0.
+///
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxIsSignedHalf(FfxFloat16x2 m)
+{
+ return ffxSaturate(m * FFXM_BROADCAST_FLOAT16X2(FFXM_NEGATIVE_INFINITY_HALF));
+}
+
+/// A single operation to return the following:
+/// m = NaN := 0
+/// m >= 0 := 0
+/// m < 0 := 1
+///
+/// Uses the following useful floating point logic,
+/// saturate(+a*(-INF)==-INF) := 0
+/// saturate( 0*(-INF)== NaN) := 0
+/// saturate(-a*(-INF)==+INF) := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m The value to test against 0.
+///
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxIsSignedHalf(FfxFloat16x3 m)
+{
+ return ffxSaturate(m * FFXM_BROADCAST_FLOAT16X3(FFXM_NEGATIVE_INFINITY_HALF));
+}
+
+/// A single operation to return the following:
+/// m = NaN := 0
+/// m >= 0 := 0
+/// m < 0 := 1
+///
+/// Uses the following useful floating point logic,
+/// saturate(+a*(-INF)==-INF) := 0
+/// saturate( 0*(-INF)== NaN) := 0
+/// saturate(-a*(-INF)==+INF) := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m The value to test against 0.
+///
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxIsSignedHalf(FfxFloat16x4 m)
+{
+ return ffxSaturate(m * FFXM_BROADCAST_FLOAT16X4(FFXM_NEGATIVE_INFINITY_HALF));
+}
+
+/// A single operation to return the following:
+/// m = NaN := 1
+/// m > 0 := 0
+/// m <= 0 := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxIsGreaterThanZeroHalf(FfxFloat16 m)
+{
+ return ffxSaturate(m * FFXM_BROADCAST_FLOAT16(FFXM_POSITIVE_INFINITY_HALF));
+}
+
+/// A single operation to return the following:
+/// m = NaN := 1
+/// m > 0 := 0
+/// m <= 0 := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxIsGreaterThanZeroHalf(FfxFloat16x2 m)
+{
+ return ffxSaturate(m * FFXM_BROADCAST_FLOAT16X2(FFXM_POSITIVE_INFINITY_HALF));
+}
+
+/// A single operation to return the following:
+/// m = NaN := 1
+/// m > 0 := 0
+/// m <= 0 := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxIsGreaterThanZeroHalf(FfxFloat16x3 m)
+{
+ return ffxSaturate(m * FFXM_BROADCAST_FLOAT16X3(FFXM_POSITIVE_INFINITY_HALF));
+}
+
+/// A single operation to return the following:
+/// m = NaN := 1
+/// m > 0 := 0
+/// m <= 0 := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxIsGreaterThanZeroHalf(FfxFloat16x4 m)
+{
+ return ffxSaturate(m * FFXM_BROADCAST_FLOAT16X4(FFXM_POSITIVE_INFINITY_HALF));
+}
+
+/// Convert a 16bit floating point value to sortable integer.
+///
+/// - If sign bit=0, flip the sign bit (positives).
+/// - If sign bit=1, flip all bits (negatives).
+///
+/// The function has the side effects that:
+/// - Larger integers are more positive values.
+/// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage).
+///
+/// @param [in] x The floating point value to make sortable.
+///
+/// @returns
+/// The sortable integer value.
+///
+/// @ingroup GPUCore
+FfxUInt16 ffxFloatToSortableIntegerHalf(FfxUInt16 x)
+{
+ return x ^ ((ffxBitShiftRightHalf(x, FFXM_BROADCAST_UINT16(15))) | FFXM_BROADCAST_UINT16(0x8000));
+}
+
+/// Convert a sortable integer to a 16bit floating point value.
+///
+/// The function has the side effects that:
+/// - If sign bit=1, flip the sign bit (positives).
+/// - If sign bit=0, flip all bits (negatives).
+///
+/// @param [in] x The sortable integer value to make floating point.
+///
+/// @returns
+/// The floating point value.
+///
+/// @ingroup GPUCore
+FfxUInt16 ffxSortableIntegerToFloatHalf(FfxUInt16 x)
+{
+ return x ^ ((~ffxBitShiftRightHalf(x, FFXM_BROADCAST_UINT16(15))) | FFXM_BROADCAST_UINT16(0x8000));
+}
+
+/// Convert a pair of 16bit floating point values to a pair of sortable integers.
+///
+/// - If sign bit=0, flip the sign bit (positives).
+/// - If sign bit=1, flip all bits (negatives).
+///
+/// The function has the side effects that:
+/// - Larger integers are more positive values.
+/// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage).
+///
+/// @param [in] x The floating point values to make sortable.
+///
+/// @returns
+/// The sortable integer values.
+///
+/// @ingroup GPUCore
+FfxUInt16x2 ffxFloatToSortableIntegerHalf(FfxUInt16x2 x)
+{
+ return x ^ ((ffxBitShiftRightHalf(x, FFXM_BROADCAST_UINT16X2(15))) | FFXM_BROADCAST_UINT16X2(0x8000));
+}
+
+/// Convert a pair of sortable integers to a pair of 16bit floating point values.
+///
+/// The function has the side effects that:
+/// - If sign bit=1, flip the sign bit (positives).
+/// - If sign bit=0, flip all bits (negatives).
+///
+/// @param [in] x The sortable integer values to make floating point.
+///
+/// @returns
+/// The floating point values.
+///
+/// @ingroup GPUCore
+FfxUInt16x2 ffxSortableIntegerToFloatHalf(FfxUInt16x2 x)
+{
+ return x ^ ((~ffxBitShiftRightHalf(x, FFXM_BROADCAST_UINT16X2(15))) | FFXM_BROADCAST_UINT16X2(0x8000));
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// [Zero] Y0 [Zero] X0
+///
+/// @param [in] i The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesZeroY0ZeroX0(FfxUInt32x2 i)
+{
+ return ((i.x) & 0xffu) | ((i.y << 16) & 0xff0000u);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// [Zero] Y1 [Zero] X1
+///
+/// @param [in] i The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesZeroY1ZeroX1(FfxUInt32x2 i)
+{
+ return ((i.x >> 8) & 0xffu) | ((i.y << 8) & 0xff0000u);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// [Zero] Y2 [Zero] X2
+///
+/// @param [in] i The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesZeroY2ZeroX2(FfxUInt32x2 i)
+{
+ return ((i.x >> 16) & 0xffu) | ((i.y) & 0xff0000u);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// [Zero] Y3 [Zero] X3
+///
+/// @param [in] i The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesZeroY3ZeroX3(FfxUInt32x2 i)
+{
+ return ((i.x >> 24) & 0xffu) | ((i.y >> 8) & 0xff0000u);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y3 Y2 Y1 X0
+///
+/// @param [in] i The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesY3Y2Y1X0(FfxUInt32x2 i)
+{
+ return ((i.x) & 0x000000ffu) | (i.y & 0xffffff00u);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y3 Y2 Y1 X2
+///
+/// @param [in] i The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesY3Y2Y1X2(FfxUInt32x2 i)
+{
+ return ((i.x >> 16) & 0x000000ffu) | (i.y & 0xffffff00u);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y3 Y2 X0 Y0
+///
+/// @param [in] i The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesY3Y2X0Y0(FfxUInt32x2 i)
+{
+ return ((i.x << 8) & 0x0000ff00u) | (i.y & 0xffff00ffu);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y3 Y2 X2 Y0
+///
+/// @param [in] i The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesY3Y2X2Y0(FfxUInt32x2 i)
+{
+ return ((i.x >> 8) & 0x0000ff00u) | (i.y & 0xffff00ffu);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y3 X0 Y1 Y0
+///
+/// @param [in] i The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesY3X0Y1Y0(FfxUInt32x2 i)
+{
+ return ((i.x << 16) & 0x00ff0000u) | (i.y & 0xff00ffffu);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y3 X2 Y1 Y0
+///
+/// @param [in] i The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesY3X2Y1Y0(FfxUInt32x2 i)
+{
+ return ((i.x) & 0x00ff0000u) | (i.y & 0xff00ffffu);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// X0 Y2 Y1 Y0
+///
+/// @param [in] i The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesX0Y2Y1Y0(FfxUInt32x2 i)
+{
+ return ((i.x << 24) & 0xff000000u) | (i.y & 0x00ffffffu);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// X2 Y2 Y1 Y0
+///
+/// @param [in] i The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesX2Y2Y1Y0(FfxUInt32x2 i)
+{
+ return ((i.x << 8) & 0xff000000u) | (i.y & 0x00ffffffu);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y2 X2 Y0 X0
+///
+/// @param [in] i The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesY2X2Y0X0(FfxUInt32x2 i)
+{
+ return ((i.x) & 0x00ff00ffu) | ((i.y << 8) & 0xff00ff00u);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y2 Y0 X2 X0
+///
+/// @param [in] i The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesY2Y0X2X0(FfxUInt32x2 i)
+{
+ return (((i.x) & 0xffu) | ((i.x >> 8) & 0xff00u) | ((i.y << 16) & 0xff0000u) | ((i.y << 8) & 0xff000000u));
+}
+
+/// Takes two Float16x2 values x and y, normalizes them and builds a single Uint16x2 value in the format {{x0,y0},{x1,y1}}.
+///
+/// @param [in] x The first float16x2 value to pack.
+/// @param [in] y The second float16x2 value to pack.
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt16x2 ffxPackX0Y0X1Y1UnsignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+ x *= FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0);
+ y *= FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0);
+ return FFXM_UINT32_TO_UINT16X2(ffxPackBytesY2X2Y0X0(FfxUInt32x2(FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(x)), FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(y)))));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7],
+/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
+///
+/// r=ffxPermuteUByte0Float16x2ToUint2(d,i)
+/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits
+/// Where 'k1' is an SGPR with 0x????
+/// Where 'k2' is an SGPR with 0x????
+/// V_PK_FMA_F16 i,i,k0.x,0
+/// V_PERM_B32 r.x,i,i,k1
+/// V_PERM_B32 r.y,i,i,k2
+///
+/// @param [in] d The FfxUInt32x2 value to be packed.
+/// @param [in] i The FfxFloat16x2 value to be packed.
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteUByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+ FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0)));
+ return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15],
+/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
+///
+/// r=ffxPermuteUByte1Float16x2ToUint2(d,i)
+/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits
+/// Where 'k1' is an SGPR with 0x????
+/// Where 'k2' is an SGPR with 0x????
+/// V_PK_FMA_F16 i,i,k0.x,0
+/// V_PERM_B32 r.x,i,i,k1
+/// V_PERM_B32 r.y,i,i,k2
+///
+/// @param [in] d The FfxUInt32x2 value to be packed.
+/// @param [in] i The FfxFloat16x2 value to be packed.
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteUByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+ FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0)));
+ return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23],
+/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops.
+///
+/// r=ffxPermuteUByte2Float16x2ToUint2(d,i)
+/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits
+/// Where 'k1' is an SGPR with 0x????
+/// Where 'k2' is an SGPR with 0x????
+/// V_PK_FMA_F16 i,i,k0.x,0
+/// V_PERM_B32 r.x,i,i,k1
+/// V_PERM_B32 r.y,i,i,k2
+///
+/// @param [in] d The FfxUInt32x2 value to be packed.
+/// @param [in] i The FfxFloat16x2 value to be packed.
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteUByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+ FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0)));
+ return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31],
+/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops.
+///
+/// r=ffxPermuteUByte3Float16x2ToUint2(d,i)
+/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits
+/// Where 'k1' is an SGPR with 0x????
+/// Where 'k2' is an SGPR with 0x????
+/// V_PK_FMA_F16 i,i,k0.x,0
+/// V_PERM_B32 r.x,i,i,k1
+/// V_PERM_B32 r.y,i,i,k2
+///
+/// @param [in] d The FfxUInt32x2 value to be packed.
+/// @param [in] i The FfxFloat16x2 value to be packed.
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteUByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+ FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0)));
+ return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops.
+///
+/// @param [in] i The FfxUInt32x2 value to be unpacked.
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteUByte0Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+ return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFXM_BROADCAST_FLOAT16X2(32768.0);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops.
+///
+/// @param [in] i The FfxUInt32x2 value to be unpacked.
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteUByte1Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+ return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFXM_BROADCAST_FLOAT16X2(32768.0);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops.
+///
+/// @param [in] i The FfxUInt32x2 value to be unpacked.
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteUByte2Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+ return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFXM_BROADCAST_FLOAT16X2(32768.0);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops.
+///
+/// @param [in] i The FfxUInt32x2 value to be unpacked.
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteUByte3Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+ return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFXM_BROADCAST_FLOAT16X2(32768.0);
+}
+
+/// Takes two Float16x2 values x and y, normalizes them and builds a single Uint16x2 value in the format {{x0,y0},{x1,y1}}.
+///
+/// @param [in] x The first float16x2 value to pack.
+/// @param [in] y The second float16x2 value to pack.
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt16x2 ffxPackX0Y0X1Y1SignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+ x = x * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFXM_BROADCAST_FLOAT16X2(0.25 / 32768.0);
+ y = y * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFXM_BROADCAST_FLOAT16X2(0.25 / 32768.0);
+ return FFXM_UINT32_TO_UINT16X2(ffxPackBytesY2X2Y0X0(FfxUInt32x2(FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(x)), FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(y)))));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7],
+/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d The FfxUInt32x2 value to be packed.
+/// @param [in] i The FfxFloat16x2 value to be packed.
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+ FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFXM_BROADCAST_FLOAT16X2(0.25 / 32768.0)));
+ return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15],
+/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d The FfxUInt32x2 value to be packed.
+/// @param [in] i The FfxFloat16x2 value to be packed.
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+ FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFXM_BROADCAST_FLOAT16X2(0.25 / 32768.0)));
+ return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23],
+/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d The FfxUInt32x2 value to be packed.
+/// @param [in] i The FfxFloat16x2 value to be packed.
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+ FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFXM_BROADCAST_FLOAT16X2(0.25 / 32768.0)));
+ return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31],
+/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d The FfxUInt32x2 value to be packed.
+/// @param [in] i The FfxFloat16x2 value to be packed.
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+ FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFXM_BROADCAST_FLOAT16X2(0.25 / 32768.0)));
+ return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7],
+/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
+///
+/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero).
+/// This is useful if there is a desire for cleared values to decode as zero.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d The FfxUInt32x2 value to be packed.
+/// @param [in] i The FfxFloat16x2 value to be packed.
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteZeroBasedSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+ FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFXM_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u;
+ return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15],
+/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
+///
+/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero).
+/// This is useful if there is a desire for cleared values to decode as zero.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d The FfxUInt32x2 value to be packed.
+/// @param [in] i The FfxFloat16x2 value to be packed.
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteZeroBasedSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+ FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFXM_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u;
+ return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23],
+/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops.
+///
+/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero).
+/// This is useful if there is a desire for cleared values to decode as zero.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d The FfxUInt32x2 value to be packed.
+/// @param [in] i The FfxFloat16x2 value to be packed.
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteZeroBasedSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+ FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFXM_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u;
+ return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31],
+/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops.
+///
+/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero).
+/// This is useful if there is a desire for cleared values to decode as zero.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d The FfxUInt32x2 value to be packed.
+/// @param [in] i The FfxFloat16x2 value to be packed.
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteZeroBasedSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+ FfxUInt32 b = FFXM_UINT16X2_TO_UINT32(FFXM_TO_UINT16X2(i * FFXM_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFXM_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u;
+ return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops.
+///
+/// Handles signed byte values.
+///
+/// @param [in] i The FfxUInt32x2 value to be unpacked.
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteSByte0Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+ return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFXM_BROADCAST_FLOAT16X2(32768.0) - FFXM_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops.
+///
+/// Handles signed byte values.
+///
+/// @param [in] i The FfxUInt32x2 value to be unpacked.
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteSByte1Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+ return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFXM_BROADCAST_FLOAT16X2(32768.0) - FFXM_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops.
+///
+/// Handles signed byte values.
+///
+/// @param [in] i The FfxUInt32x2 value to be unpacked.
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteSByte2Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+ return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFXM_BROADCAST_FLOAT16X2(32768.0) - FFXM_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops.
+///
+/// Handles signed byte values.
+///
+/// @param [in] i The FfxUInt32x2 value to be unpacked.
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteSByte3Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+ return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFXM_BROADCAST_FLOAT16X2(32768.0) - FFXM_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops.
+///
+/// Handles signed byte values.
+///
+/// @param [in] i The FfxUInt32x2 value to be unpacked.
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteZeroBasedSByte0Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+ return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i) ^ 0x00800080u)) * FFXM_BROADCAST_FLOAT16X2(32768.0) - FFXM_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops.
+///
+/// Handles signed byte values.
+///
+/// @param [in] i The FfxUInt32x2 value to be unpacked.
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteZeroBasedSByte1Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+ return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i) ^ 0x00800080u)) * FFXM_BROADCAST_FLOAT16X2(32768.0) - FFXM_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops.
+///
+/// Handles signed byte values.
+///
+/// @param [in] i The FfxUInt32x2 value to be unpacked.
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteZeroBasedSByte2Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+ return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i) ^ 0x00800080u)) * FFXM_BROADCAST_FLOAT16X2(32768.0) - FFXM_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops.
+///
+/// Handles signed byte values.
+///
+/// @param [in] i The FfxUInt32x2 value to be unpacked.
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteZeroBasedSByte3Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+ return FFXM_TO_FLOAT16X2(FFXM_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i) ^ 0x00800080u)) * FFXM_BROADCAST_FLOAT16X2(32768.0) - FFXM_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Calculate a half-precision low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxApproximateSqrtHalf(FfxFloat16 a)
+{
+ return FFXM_TO_FLOAT16((FFXM_TO_UINT16(a) >> FFXM_BROADCAST_UINT16(1)) + FFXM_BROADCAST_UINT16(0x1de2));
+}
+
+/// Calculate a half-precision low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxApproximateSqrtHalf(FfxFloat16x2 a)
+{
+ return FFXM_TO_FLOAT16X2((FFXM_TO_UINT16X2(a) >> FFXM_BROADCAST_UINT16X2(1)) + FFXM_BROADCAST_UINT16X2(0x1de2));
+}
+
+/// Calculate a half-precision low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxApproximateSqrtHalf(FfxFloat16x3 a)
+{
+ return FFXM_TO_FLOAT16X3((FFXM_TO_UINT16X3(a) >> FFXM_BROADCAST_UINT16X3(1)) + FFXM_BROADCAST_UINT16X3(0x1de2));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxApproximateReciprocalHalf(FfxFloat16 a)
+{
+ return FFXM_TO_FLOAT16(FFXM_BROADCAST_UINT16(0x7784) - FFXM_TO_UINT16(a));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxApproximateReciprocalHalf(FfxFloat16x2 a)
+{
+ return FFXM_TO_FLOAT16X2(FFXM_BROADCAST_UINT16X2(0x7784) - FFXM_TO_UINT16X2(a));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxApproximateReciprocalHalf(FfxFloat16x3 a)
+{
+ return FFXM_TO_FLOAT16X3(FFXM_BROADCAST_UINT16X3(0x7784) - FFXM_TO_UINT16X3(a));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxApproximateReciprocalHalf(FfxFloat16x4 a)
+{
+ return FFXM_TO_FLOAT16X4(FFXM_BROADCAST_UINT16X4(0x7784) - FFXM_TO_UINT16X4(a));
+}
+
+/// Calculate a half-precision medium-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxApproximateReciprocalMediumHalf(FfxFloat16 a)
+{
+ FfxFloat16 b = FFXM_TO_FLOAT16(FFXM_BROADCAST_UINT16(0x778d) - FFXM_TO_UINT16(a));
+ return b * (-b * a + FFXM_BROADCAST_FLOAT16(2.0));
+}
+
+/// Calculate a half-precision medium-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxApproximateReciprocalMediumHalf(FfxFloat16x2 a)
+{
+ FfxFloat16x2 b = FFXM_TO_FLOAT16X2(FFXM_BROADCAST_UINT16X2(0x778d) - FFXM_TO_UINT16X2(a));
+ return b * (-b * a + FFXM_BROADCAST_FLOAT16X2(2.0));
+}
+
+/// Calculate a half-precision medium-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxApproximateReciprocalMediumHalf(FfxFloat16x3 a)
+{
+ FfxFloat16x3 b = FFXM_TO_FLOAT16X3(FFXM_BROADCAST_UINT16X3(0x778d) - FFXM_TO_UINT16X3(a));
+ return b * (-b * a + FFXM_BROADCAST_FLOAT16X3(2.0));
+}
+
+/// Calculate a half-precision medium-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxApproximateReciprocalMediumHalf(FfxFloat16x4 a)
+{
+ FfxFloat16x4 b = FFXM_TO_FLOAT16X4(FFXM_BROADCAST_UINT16X4(0x778d) - FFXM_TO_UINT16X4(a));
+ return b * (-b * a + FFXM_BROADCAST_FLOAT16X4(2.0));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for.
+///
+/// @returns
+/// An approximation of the reciprocal of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxApproximateReciprocalSquareRootHalf(FfxFloat16 a)
+{
+ return FFXM_TO_FLOAT16(FFXM_BROADCAST_UINT16(0x59a3) - (FFXM_TO_UINT16(a) >> FFXM_BROADCAST_UINT16(1)));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for.
+///
+/// @returns
+/// An approximation of the reciprocal of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x2 a)
+{
+ return FFXM_TO_FLOAT16X2(FFXM_BROADCAST_UINT16X2(0x59a3) - (FFXM_TO_UINT16X2(a) >> FFXM_BROADCAST_UINT16X2(1)));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for.
+///
+/// @returns
+/// An approximation of the reciprocal of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x3 a)
+{
+ return FFXM_TO_FLOAT16X3(FFXM_BROADCAST_UINT16X3(0x59a3) - (FFXM_TO_UINT16X3(a) >> FFXM_BROADCAST_UINT16X3(1)));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for.
+///
+/// @returns
+/// An approximation of the reciprocal of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x4 a)
+{
+ return FFXM_TO_FLOAT16X4(FFXM_BROADCAST_UINT16X4(0x59a3) - (FFXM_TO_UINT16X4(a) >> FFXM_BROADCAST_UINT16X4(1)));
+}
+
+/// An approximation of sine.
+///
+/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+/// is {-1/4 to 1/4} representing {-1 to 1}.
+///
+/// @param [in] x The value to calculate approximate sine for.
+///
+/// @returns
+/// The approximate sine of value.
+FfxFloat16 ffxParabolicSinHalf(FfxFloat16 x)
+{
+ return x * abs(x) - x;
+}
+
+/// An approximation of sine.
+///
+/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+/// is {-1/4 to 1/4} representing {-1 to 1}.
+///
+/// @param [in] x The value to calculate approximate sine for.
+///
+/// @returns
+/// The approximate sine of value.
+FfxFloat16x2 ffxParabolicSinHalf(FfxFloat16x2 x)
+{
+ return x * abs(x) - x;
+}
+
+/// An approximation of cosine.
+///
+/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+/// is {-1/4 to 1/4} representing {-1 to 1}.
+///
+/// @param [in] x The value to calculate approximate cosine for.
+///
+/// @returns
+/// The approximate cosine of value.
+FfxFloat16 ffxParabolicCosHalf(FfxFloat16 x)
+{
+ x = ffxFract(x * FFXM_BROADCAST_FLOAT16(0.5) + FFXM_BROADCAST_FLOAT16(0.75));
+ x = x * FFXM_BROADCAST_FLOAT16(2.0) - FFXM_BROADCAST_FLOAT16(1.0);
+ return ffxParabolicSinHalf(x);
+}
+
+/// An approximation of cosine.
+///
+/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+/// is {-1/4 to 1/4} representing {-1 to 1}.
+///
+/// @param [in] x The value to calculate approximate cosine for.
+///
+/// @returns
+/// The approximate cosine of value.
+FfxFloat16x2 ffxParabolicCosHalf(FfxFloat16x2 x)
+{
+ x = ffxFract(x * FFXM_BROADCAST_FLOAT16X2(0.5) + FFXM_BROADCAST_FLOAT16X2(0.75));
+ x = x * FFXM_BROADCAST_FLOAT16X2(2.0) - FFXM_BROADCAST_FLOAT16X2(1.0);
+ return ffxParabolicSinHalf(x);
+}
+
+/// An approximation of both sine and cosine.
+///
+/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+/// is {-1/4 to 1/4} representing {-1 to 1}.
+///
+/// @param [in] x The value to calculate approximate cosine for.
+///
+/// @returns
+/// A FfxFloat32x2 containing approximations of both sine and cosine of value.
+FfxFloat16x2 ffxParabolicSinCosHalf(FfxFloat16 x)
+{
+ FfxFloat16 y = ffxFract(x * FFXM_BROADCAST_FLOAT16(0.5) + FFXM_BROADCAST_FLOAT16(0.75));
+ y = y * FFXM_BROADCAST_FLOAT16(2.0) - FFXM_BROADCAST_FLOAT16(1.0);
+ return ffxParabolicSinHalf(FfxFloat16x2(x, y));
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x The first value to be fed into the AND operator.
+/// @param [in] y The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxUInt16 ffxZeroOneAndHalf(FfxUInt16 x, FfxUInt16 y)
+{
+ return min(x, y);
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x The first value to be fed into the AND operator.
+/// @param [in] y The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxUInt16x2 ffxZeroOneAndHalf(FfxUInt16x2 x, FfxUInt16x2 y)
+{
+ return min(x, y);
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x The first value to be fed into the AND operator.
+/// @param [in] y The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxUInt16x3 ffxZeroOneAndHalf(FfxUInt16x3 x, FfxUInt16x3 y)
+{
+ return min(x, y);
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x The first value to be fed into the AND operator.
+/// @param [in] y The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxUInt16x4 ffxZeroOneAndHalf(FfxUInt16x4 x, FfxUInt16x4 y)
+{
+ return min(x, y);
+}
+
+/// Conditional free logic NOT operation using two half-precision values.
+///
+/// @param [in] x The first value to be fed into the NOT operator.
+/// @param [in] y The second value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPUCore
+FfxUInt16 ffxZeroOneNotHalf(FfxUInt16 x)
+{
+ return x ^ FFXM_BROADCAST_UINT16(1);
+}
+
+/// Conditional free logic NOT operation using two half-precision values.
+///
+/// @param [in] x The first value to be fed into the NOT operator.
+/// @param [in] y The second value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPUCore
+FfxUInt16x2 ffxZeroOneNotHalf(FfxUInt16x2 x)
+{
+ return x ^ FFXM_BROADCAST_UINT16X2(1);
+}
+
+/// Conditional free logic NOT operation using two half-precision values.
+///
+/// @param [in] x The first value to be fed into the NOT operator.
+/// @param [in] y The second value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPUCore
+FfxUInt16x3 ffxZeroOneNotHalf(FfxUInt16x3 x)
+{
+ return x ^ FFXM_BROADCAST_UINT16X3(1);
+}
+
+/// Conditional free logic NOT operation using two half-precision values.
+///
+/// @param [in] x The first value to be fed into the NOT operator.
+/// @param [in] y The second value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPUCore
+FfxUInt16x4 ffxZeroOneNotHalf(FfxUInt16x4 x)
+{
+ return x ^ FFXM_BROADCAST_UINT16X4(1);
+}
+
+/// Conditional free logic OR operation using two half-precision values.
+///
+/// @param [in] x The first value to be fed into the OR operator.
+/// @param [in] y The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt16 ffxZeroOneOrHalf(FfxUInt16 x, FfxUInt16 y)
+{
+ return max(x, y);
+}
+
+/// Conditional free logic OR operation using two half-precision values.
+///
+/// @param [in] x The first value to be fed into the OR operator.
+/// @param [in] y The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt16x2 ffxZeroOneOrHalf(FfxUInt16x2 x, FfxUInt16x2 y)
+{
+ return max(x, y);
+}
+
+/// Conditional free logic OR operation using two half-precision values.
+///
+/// @param [in] x The first value to be fed into the OR operator.
+/// @param [in] y The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt16x3 ffxZeroOneOrHalf(FfxUInt16x3 x, FfxUInt16x3 y)
+{
+ return max(x, y);
+}
+
+/// Conditional free logic OR operation using two half-precision values.
+///
+/// @param [in] x The first value to be fed into the OR operator.
+/// @param [in] y The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt16x4 ffxZeroOneOrHalf(FfxUInt16x4 x, FfxUInt16x4 y)
+{
+ return max(x, y);
+}
+
+/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint.
+///
+/// @param [in] x The value to converted to a Uint.
+///
+/// @returns
+/// The converted Uint value.
+///
+/// @ingroup GPUCore
+FfxUInt16 ffxZeroOneFloat16ToUint16(FfxFloat16 x)
+{
+ return FFXM_TO_UINT16(x * FFXM_TO_FLOAT16(FFXM_TO_UINT16(1)));
+}
+
+/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint.
+///
+/// @param [in] x The value to converted to a Uint.
+///
+/// @returns
+/// The converted Uint value.
+///
+/// @ingroup GPUCore
+FfxUInt16x2 ffxZeroOneFloat16x2ToUint16x2(FfxFloat16x2 x)
+{
+ return FFXM_TO_UINT16X2(x * FFXM_TO_FLOAT16X2(FfxUInt16x2(1, 1)));
+}
+
+/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint.
+///
+/// @param [in] x The value to converted to a Uint.
+///
+/// @returns
+/// The converted Uint value.
+///
+/// @ingroup GPUCore
+FfxUInt16x3 ffxZeroOneFloat16x3ToUint16x3(FfxFloat16x3 x)
+{
+ return FFXM_TO_UINT16X3(x * FFXM_TO_FLOAT16X3(FfxUInt16x3(1, 1, 1)));
+}
+
+/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint.
+///
+/// @param [in] x The value to converted to a Uint.
+///
+/// @returns
+/// The converted Uint value.
+///
+/// @ingroup GPUCore
+FfxUInt16x4 ffxZeroOneFloat16x4ToUint16x4(FfxFloat16x4 x)
+{
+ return FFXM_TO_UINT16X4(x * FFXM_TO_FLOAT16X4(FfxUInt16x4(1, 1, 1, 1)));
+}
+
+/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32.
+///
+/// @param [in] x The value to converted to a half-precision FfxFloat32.
+///
+/// @returns
+/// The converted half-precision FfxFloat32 value.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxZeroOneUint16ToFloat16(FfxUInt16 x)
+{
+ return FFXM_TO_FLOAT16(x * FFXM_TO_UINT16(FFXM_TO_FLOAT16(1.0)));
+}
+
+/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32.
+///
+/// @param [in] x The value to converted to a half-precision FfxFloat32.
+///
+/// @returns
+/// The converted half-precision FfxFloat32 value.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxZeroOneUint16x2ToFloat16x2(FfxUInt16x2 x)
+{
+ return FFXM_TO_FLOAT16X2(x * FFXM_TO_UINT16X2(FfxUInt16x2(FFXM_TO_FLOAT16(1.0), FFXM_TO_FLOAT16(1.0))));
+}
+
+/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32.
+///
+/// @param [in] x The value to converted to a half-precision FfxFloat32.
+///
+/// @returns
+/// The converted half-precision FfxFloat32 value.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxZeroOneUint16x3ToFloat16x3(FfxUInt16x3 x)
+{
+ return FFXM_TO_FLOAT16X3(x * FFXM_TO_UINT16X3(FfxUInt16x3(FFXM_TO_FLOAT16(1.0), FFXM_TO_FLOAT16(1.0), FFXM_TO_FLOAT16(1.0))));
+}
+
+/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32.
+///
+/// @param [in] x The value to converted to a half-precision FfxFloat32.
+///
+/// @returns
+/// The converted half-precision FfxFloat32 value.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxZeroOneUint16x4ToFloat16x4(FfxUInt16x4 x)
+{
+ return FFXM_TO_FLOAT16X4(x * FFXM_TO_UINT16X4(FfxUInt16x4(FFXM_TO_FLOAT16(1.0), FFXM_TO_FLOAT16(1.0), FFXM_TO_FLOAT16(1.0), FFXM_TO_FLOAT16(1.0))));
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x The first value to be fed into the AND operator.
+/// @param [in] y The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxZeroOneAndHalf(FfxFloat16 x, FfxFloat16 y)
+{
+ return min(x, y);
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x The first value to be fed into the AND operator.
+/// @param [in] y The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxZeroOneAndHalf(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+ return min(x, y);
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x The first value to be fed into the AND operator.
+/// @param [in] y The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxZeroOneAndHalf(FfxFloat16x3 x, FfxFloat16x3 y)
+{
+ return min(x, y);
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x The first value to be fed into the AND operator.
+/// @param [in] y The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxZeroOneAndHalf(FfxFloat16x4 x, FfxFloat16x4 y)
+{
+ return min(x, y);
+}
+
+/// Conditional free logic AND NOT operation using two half-precision values.
+///
+/// @param [in] x The first value to be fed into the AND NOT operator.
+/// @param [in] y The second value to be fed into the AND NOT operator.
+///
+/// @returns
+/// Result of the AND NOT operation.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxSignedZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y)
+{
+ return (-x) * y + FFXM_BROADCAST_FLOAT16(1.0);
+}
+
+/// Conditional free logic AND NOT operation using two half-precision values.
+///
+/// @param [in] x The first value to be fed into the AND NOT operator.
+/// @param [in] y The second value to be fed into the AND NOT operator.
+///
+/// @returns
+/// Result of the AND NOT operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxSignedZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+ return (-x) * y + FFXM_BROADCAST_FLOAT16X2(1.0);
+}
+
+/// Conditional free logic AND NOT operation using two half-precision values.
+///
+/// @param [in] x The first value to be fed into the AND NOT operator.
+/// @param [in] y The second value to be fed into the AND NOT operator.
+///
+/// @returns
+/// Result of the AND NOT operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxSignedZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y)
+{
+ return (-x) * y + FFXM_BROADCAST_FLOAT16X3(1.0);
+}
+
+/// Conditional free logic AND NOT operation using two half-precision values.
+///
+/// @param [in] x The first value to be fed into the AND NOT operator.
+/// @param [in] y The second value to be fed into the AND NOT operator.
+///
+/// @returns
+/// Result of the AND NOT operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxSignedZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y)
+{
+ return (-x) * y + FFXM_BROADCAST_FLOAT16X4(1.0);
+}
+
+/// Conditional free logic AND operation using two half-precision values followed by
+/// a NOT operation using the resulting value and a third half-precision value.
+///
+/// @param [in] x The first value to be fed into the AND operator.
+/// @param [in] y The second value to be fed into the AND operator.
+/// @param [in] z The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
+{
+ return ffxSaturate(x * y + z);
+}
+
+/// Conditional free logic AND operation using two half-precision values followed by
+/// a NOT operation using the resulting value and a third half-precision value.
+///
+/// @param [in] x The first value to be fed into the AND operator.
+/// @param [in] y The second value to be fed into the AND operator.
+/// @param [in] z The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)
+{
+ return ffxSaturate(x * y + z);
+}
+
+/// Conditional free logic AND operation using two half-precision values followed by
+/// a NOT operation using the resulting value and a third half-precision value.
+///
+/// @param [in] x The first value to be fed into the AND operator.
+/// @param [in] y The second value to be fed into the AND operator.
+/// @param [in] z The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)
+{
+ return ffxSaturate(x * y + z);
+}
+
+/// Conditional free logic AND operation using two half-precision values followed by
+/// a NOT operation using the resulting value and a third half-precision value.
+///
+/// @param [in] x The first value to be fed into the AND operator.
+/// @param [in] y The second value to be fed into the AND operator.
+/// @param [in] z The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
+{
+ return ffxSaturate(x * y + z);
+}
+
+/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16 x)
+{
+ return ffxSaturate(x * FFXM_BROADCAST_FLOAT16(FFXM_POSITIVE_INFINITY_HALF));
+}
+
+/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x2 x)
+{
+ return ffxSaturate(x * FFXM_BROADCAST_FLOAT16X2(FFXM_POSITIVE_INFINITY_HALF));
+}
+
+/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x3 x)
+{
+ return ffxSaturate(x * FFXM_BROADCAST_FLOAT16X3(FFXM_POSITIVE_INFINITY_HALF));
+}
+
+/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x4 x)
+{
+ return ffxSaturate(x * FFXM_BROADCAST_FLOAT16X4(FFXM_POSITIVE_INFINITY_HALF));
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxZeroOneNotHalf(FfxFloat16 x)
+{
+ return FFXM_BROADCAST_FLOAT16(1.0) - x;
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxZeroOneNotHalf(FfxFloat16x2 x)
+{
+ return FFXM_BROADCAST_FLOAT16X2(1.0) - x;
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxZeroOneNotHalf(FfxFloat16x3 x)
+{
+ return FFXM_BROADCAST_FLOAT16X3(1.0) - x;
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxZeroOneNotHalf(FfxFloat16x4 x)
+{
+ return FFXM_BROADCAST_FLOAT16X4(1.0) - x;
+}
+
+/// Conditional free logic OR operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x The first value to be fed into the OR operator.
+/// @param [in] y The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxZeroOneOrHalf(FfxFloat16 x, FfxFloat16 y)
+{
+ return max(x, y);
+}
+
+/// Conditional free logic OR operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x The first value to be fed into the OR operator.
+/// @param [in] y The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxZeroOneOrHalf(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+ return max(x, y);
+}
+
+/// Conditional free logic OR operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x The first value to be fed into the OR operator.
+/// @param [in] y The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxZeroOneOrHalf(FfxFloat16x3 x, FfxFloat16x3 y)
+{
+ return max(x, y);
+}
+
+/// Conditional free logic OR operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x The first value to be fed into the OR operator.
+/// @param [in] y The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxZeroOneOrHalf(FfxFloat16x4 x, FfxFloat16x4 y)
+{
+ return max(x, y);
+}
+
+/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x The value to compare against zero.
+/// @param [in] y The value to return if the comparision is greater than zero.
+/// @param [in] z The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxZeroOneSelectHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
+{
+ FfxFloat16 r = (-x) * z + z;
+ return x * y + r;
+}
+
+/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x The value to compare against zero.
+/// @param [in] y The value to return if the comparision is greater than zero.
+/// @param [in] z The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxZeroOneSelectHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)
+{
+ FfxFloat16x2 r = (-x) * z + z;
+ return x * y + r;
+}
+
+/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x The value to compare against zero.
+/// @param [in] y The value to return if the comparision is greater than zero.
+/// @param [in] z The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxZeroOneSelectHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)
+{
+ FfxFloat16x3 r = (-x) * z + z;
+ return x * y + r;
+}
+
+/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x The value to compare against zero.
+/// @param [in] y The value to return if the comparision is greater than zero.
+/// @param [in] z The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxZeroOneSelectHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
+{
+ FfxFloat16x4 r = (-x) * z + z;
+ return x * y + r;
+}
+
+/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxZeroOneIsSignedHalf(FfxFloat16 x)
+{
+ return ffxSaturate(x * FFXM_BROADCAST_FLOAT16(FFXM_NEGATIVE_INFINITY_HALF));
+}
+
+/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxZeroOneIsSignedHalf(FfxFloat16x2 x)
+{
+ return ffxSaturate(x * FFXM_BROADCAST_FLOAT16X2(FFXM_NEGATIVE_INFINITY_HALF));
+}
+
+/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxZeroOneIsSignedHalf(FfxFloat16x3 x)
+{
+ return ffxSaturate(x * FFXM_BROADCAST_FLOAT16X3(FFXM_NEGATIVE_INFINITY_HALF));
+}
+
+/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxZeroOneIsSignedHalf(FfxFloat16x4 x)
+{
+ return ffxSaturate(x * FFXM_BROADCAST_FLOAT16X4(FFXM_NEGATIVE_INFINITY_HALF));
+}
+
+/// Compute a Rec.709 color space.
+///
+/// Rec.709 is used for some HDTVs.
+///
+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
+/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
+/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
+///
+/// @param [in] c The color to convert to Rec. 709.
+///
+/// @returns
+/// The color in Rec.709 space.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxRec709FromLinearHalf(FfxFloat16 c)
+{
+ FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45);
+ FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099);
+ return clamp(j.x, c * j.y, pow(c, j.z) * k.x + k.y);
+}
+
+/// Compute a Rec.709 color space.
+///
+/// Rec.709 is used for some HDTVs.
+///
+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
+/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
+/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
+///
+/// @param [in] c The color to convert to Rec. 709.
+///
+/// @returns
+/// The color in Rec.709 space.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxRec709FromLinearHalf(FfxFloat16x2 c)
+{
+ FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45);
+ FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099);
+ return clamp(j.xx, c * j.yy, pow(c, j.zz) * k.xx + k.yy);
+}
+
+/// Compute a Rec.709 color space.
+///
+/// Rec.709 is used for some HDTVs.
+///
+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
+/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
+/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
+///
+/// @param [in] c The color to convert to Rec. 709.
+///
+/// @returns
+/// The color in Rec.709 space.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxRec709FromLinearHalf(FfxFloat16x3 c)
+{
+ FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45);
+ FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099);
+ return clamp(j.xxx, c * j.yyy, pow(c, j.zzz) * k.xxx + k.yyy);
+}
+
+/// Compute a gamma value from a linear value.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGammaHalf.
+///
+/// @param [in] c The value to convert to gamma space from linear.
+/// @param [in] rcpX The reciprocal of power value used for the gamma curve.
+///
+/// @returns
+/// A value in gamma space.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxGammaFromLinearHalf(FfxFloat16 c, FfxFloat16 rcpX)
+{
+ return pow(c, FFXM_BROADCAST_FLOAT16(rcpX));
+}
+
+/// Compute a gamma value from a linear value.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGammaHalf.
+///
+/// @param [in] c The value to convert to gamma space from linear.
+/// @param [in] rcpX The reciprocal of power value used for the gamma curve.
+///
+/// @returns
+/// A value in gamma space.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxGammaFromLinearHalf(FfxFloat16x2 c, FfxFloat16 rcpX)
+{
+ return pow(c, FFXM_BROADCAST_FLOAT16X2(rcpX));
+}
+
+/// Compute a gamma value from a linear value.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGammaHalf.
+///
+/// @param [in] c The value to convert to gamma space from linear.
+/// @param [in] rcpX The reciprocal of power value used for the gamma curve.
+///
+/// @returns
+/// A value in gamma space.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxGammaFromLinearHalf(FfxFloat16x3 c, FfxFloat16 rcpX)
+{
+ return pow(c, FFXM_BROADCAST_FLOAT16X3(rcpX));
+}
+
+/// Compute an SRGB value from a linear value.
+///
+/// @param [in] c The value to convert to SRGB from linear.
+///
+/// @returns
+/// A value in SRGB space.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxSrgbFromLinearHalf(FfxFloat16 c)
+{
+ FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+ FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055);
+ return clamp(j.x, c * j.y, pow(c, j.z) * k.x + k.y);
+}
+
+/// Compute an SRGB value from a linear value.
+///
+/// @param [in] c The value to convert to SRGB from linear.
+///
+/// @returns
+/// A value in SRGB space.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxSrgbFromLinearHalf(FfxFloat16x2 c)
+{
+ FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+ FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055);
+ return clamp(j.xx, c * j.yy, pow(c, j.zz) * k.xx + k.yy);
+}
+
+/// Compute an SRGB value from a linear value.
+///
+/// @param [in] c The value to convert to SRGB from linear.
+///
+/// @returns
+/// A value in SRGB space.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxSrgbFromLinearHalf(FfxFloat16x3 c)
+{
+ FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+ FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055);
+ return clamp(j.xxx, c * j.yyy, pow(c, j.zzz) * k.xxx + k.yyy);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] c The value to compute the square root for.
+///
+/// @returns
+/// A square root of the input value.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxSquareRootHalf(FfxFloat16 c)
+{
+ return sqrt(c);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] c The value to compute the square root for.
+///
+/// @returns
+/// A square root of the input value.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxSquareRootHalf(FfxFloat16x2 c)
+{
+ return sqrt(c);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] c The value to compute the square root for.
+///
+/// @returns
+/// A square root of the input value.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxSquareRootHalf(FfxFloat16x3 c)
+{
+ return sqrt(c);
+}
+
+/// Compute the cube root of a value.
+///
+/// @param [in] c The value to compute the cube root for.
+///
+/// @returns
+/// A cube root of the input value.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxCubeRootHalf(FfxFloat16 c)
+{
+ return pow(c, FFXM_BROADCAST_FLOAT16(1.0 / 3.0));
+}
+
+/// Compute the cube root of a value.
+///
+/// @param [in] c The value to compute the cube root for.
+///
+/// @returns
+/// A cube root of the input value.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxCubeRootHalf(FfxFloat16x2 c)
+{
+ return pow(c, FFXM_BROADCAST_FLOAT16X2(1.0 / 3.0));
+}
+
+/// Compute the cube root of a value.
+///
+/// @param [in] c The value to compute the cube root for.
+///
+/// @returns
+/// A cube root of the input value.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxCubeRootHalf(FfxFloat16x3 c)
+{
+ return pow(c, FFXM_BROADCAST_FLOAT16X3(1.0 / 3.0));
+}
+
+/// Compute a linear value from a REC.709 value.
+///
+/// @param [in] c The value to convert to linear from REC.709.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxLinearFromRec709Half(FfxFloat16 c)
+{
+ FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
+ FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099);
+ return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z));
+}
+
+/// Compute a linear value from a REC.709 value.
+///
+/// @param [in] c The value to convert to linear from REC.709.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxLinearFromRec709Half(FfxFloat16x2 c)
+{
+ FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
+ FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099);
+ return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz));
+}
+
+/// Compute a linear value from a REC.709 value.
+///
+/// @param [in] c The value to convert to linear from REC.709.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxLinearFromRec709Half(FfxFloat16x3 c)
+{
+ FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
+ FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099);
+ return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz));
+}
+
+/// Compute a linear value from a value in a gamma space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] c The value to convert to linear in gamma space.
+/// @param [in] x The power value used for the gamma curve.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxLinearFromGammaHalf(FfxFloat16 c, FfxFloat16 x)
+{
+ return pow(c, FFXM_BROADCAST_FLOAT16(x));
+}
+
+/// Compute a linear value from a value in a gamma space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] c The value to convert to linear in gamma space.
+/// @param [in] x The power value used for the gamma curve.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxLinearFromGammaHalf(FfxFloat16x2 c, FfxFloat16 x)
+{
+ return pow(c, FFXM_BROADCAST_FLOAT16X2(x));
+}
+
+/// Compute a linear value from a value in a gamma space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] c The value to convert to linear in gamma space.
+/// @param [in] x The power value used for the gamma curve.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxLinearFromGammaHalf(FfxFloat16x3 c, FfxFloat16 x)
+{
+ return pow(c, FFXM_BROADCAST_FLOAT16X3(x));
+}
+
+/// Compute a linear value from a value in a SRGB space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] c The value to convert to linear in SRGB space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c)
+{
+ FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+ FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
+ return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z));
+}
+
+/// Compute a linear value from a value in a SRGB space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] c The value to convert to linear in SRGB space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c)
+{
+ FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+ FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
+ return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz));
+}
+
+/// Compute a linear value from a value in a SRGB space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] c The value to convert to linear in SRGB space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxLinearFromSrgbHalf(FfxFloat16x3 c)
+{
+ FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+ FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
+ return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz));
+}
+
+/// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear.
+///
+/// 543210
+/// ======
+/// ..xxx.
+/// yy...y
+///
+/// @param [in] a The input 1D coordinates to remap.
+///
+/// @returns
+/// The remapped 2D coordinates.
+///
+/// @ingroup GPUCore
+FfxUInt16x2 ffxRemapForQuadHalf(FfxUInt32 a)
+{
+ return FfxUInt16x2(bitfieldExtract(a, 1u, 3u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), a, 1u));
+}
+
+/// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions.
+///
+/// The 64-wide lane indices to 8x8 remapping is performed as follows:
+///
+/// 00 01 08 09 10 11 18 19
+/// 02 03 0a 0b 12 13 1a 1b
+/// 04 05 0c 0d 14 15 1c 1d
+/// 06 07 0e 0f 16 17 1e 1f
+/// 20 21 28 29 30 31 38 39
+/// 22 23 2a 2b 32 33 3a 3b
+/// 24 25 2c 2d 34 35 3c 3d
+/// 26 27 2e 2f 36 37 3e 3f
+///
+/// @param [in] a The input 1D coordinate to remap.
+///
+/// @returns
+/// The remapped 2D coordinates.
+///
+/// @ingroup GPUCore
+FfxUInt16x2 ffxRemapForWaveReductionHalf(FfxUInt32 a)
+{
+ return FfxUInt16x2(bitfieldInsertMask(bitfieldExtract(a, 2u, 3u), a, 1u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), bitfieldExtract(a, 1u, 2u), 2u));
+}
+
+#endif // FFXM_HALF
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common_half.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common_half.h.meta
new file mode 100644
index 0000000..7b4903e
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_gpu_common_half.h.meta
@@ -0,0 +1,67 @@
+fileFormatVersion: 2
+guid: b5e484d04abc3c84788c93d9a2e50b7f
+PluginImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ iconMap: {}
+ executionOrder: {}
+ defineConstraints: []
+ isPreloaded: 0
+ isOverridable: 1
+ isExplicitlyReferenced: 0
+ validateReferences: 1
+ platformData:
+ - first:
+ : Any
+ second:
+ enabled: 0
+ settings:
+ Exclude Android: 1
+ Exclude Editor: 1
+ Exclude GameCoreScarlett: 1
+ Exclude GameCoreXboxOne: 1
+ Exclude Linux64: 1
+ Exclude OSXUniversal: 1
+ Exclude PS4: 1
+ Exclude PS5: 1
+ Exclude WebGL: 1
+ Exclude Win: 1
+ Exclude Win64: 1
+ - first:
+ Any:
+ second:
+ enabled: 0
+ settings: {}
+ - first:
+ Editor: Editor
+ second:
+ enabled: 0
+ settings:
+ DefaultValueInitialized: true
+ - first:
+ Standalone: Linux64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: OSXUniversal
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_hlsl.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_hlsl.h
new file mode 100644
index 0000000..e77bb1d
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_hlsl.h
@@ -0,0 +1,1643 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+/// @defgroup HLSLCore HLSL Core
+/// HLSL core defines and functions
+///
+/// @ingroup FfxHLSL
+
+/// A define for abstracting shared memory between shading languages.
+///
+/// @ingroup HLSLCore
+#define FFXM_GROUPSHARED groupshared
+
+/// A define for abstracting compute memory barriers between shading languages.
+///
+/// @ingroup HLSLCore
+#define FFXM_GROUP_MEMORY_BARRIER GroupMemoryBarrierWithGroupSync()
+
+/// A define for abstracting compute atomic additions between shading languages.
+///
+/// @ingroup HLSLCore
+#define FFXM_ATOMIC_ADD(x, y) InterlockedAdd(x, y)
+
+/// A define added to accept static markup on functions to aid CPU/GPU portability of code.
+///
+/// @ingroup HLSLCore
+#define FFXM_STATIC static
+
+/// A define for abstracting loop unrolling between shading languages.
+///
+/// @ingroup HLSLCore
+#define FFXM_UNROLL [unroll]
+
+/// A define for abstracting a 'greater than' comparison operator between two types.
+///
+/// @ingroup HLSLCore
+#define FFXM_GREATER_THAN(x, y) x > y
+
+/// A define for abstracting a 'greater than or equal' comparison operator between two types.
+///
+/// @ingroup HLSLCore
+#define FFXM_GREATER_THAN_EQUAL(x, y) x >= y
+
+/// A define for abstracting a 'less than' comparison operator between two types.
+///
+/// @ingroup HLSLCore
+#define FFXM_LESS_THAN(x, y) x < y
+
+/// A define for abstracting a 'less than or equal' comparison operator between two types.
+///
+/// @ingroup HLSLCore
+#define FFXM_LESS_THAN_EQUAL(x, y) x <= y
+
+/// A define for abstracting an 'equal' comparison operator between two types.
+///
+/// @ingroup HLSLCore
+#define FFXM_EQUAL(x, y) x == y
+
+/// A define for abstracting a 'not equal' comparison operator between two types.
+///
+/// @ingroup HLSLCore
+#define FFXM_NOT_EQUAL(x, y) x != y
+
+/// A define for abstracting matrix multiply operations between shading languages.
+///
+/// @ingroup HLSLCore
+#define FFXM_MATRIX_MULTIPLY(a, b) mul(a, b)
+
+/// A define for abstracting vector transformations between shading languages.
+///
+/// @ingroup HLSLCore
+#define FFXM_TRANSFORM_VECTOR(a, b) mul(a, b)
+
+/// A define for abstracting modulo operations between shading languages.
+///
+/// @ingroup HLSLCore
+#define FFXM_MODULO(a, b) (fmod(a, b))
+
+/// Broadcast a scalar value to a 1-dimensional floating point vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_FLOAT32(x) FfxFloat32(x)
+
+/// Broadcast a scalar value to a 2-dimensional floating point vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_FLOAT32X2(x) FfxFloat32(x)
+
+/// Broadcast a scalar value to a 3-dimensional floating point vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_FLOAT32X3(x) FfxFloat32(x)
+
+/// Broadcast a scalar value to a 4-dimensional floating point vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_FLOAT32X4(x) FfxFloat32(x)
+
+/// Broadcast a scalar value to a 1-dimensional unsigned integer vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_UINT32(x) FfxUInt32(x)
+
+/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_UINT32X2(x) FfxUInt32(x)
+
+/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_UINT32X3(x) FfxUInt32(x)
+
+/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_UINT32X4(x) FfxUInt32(x)
+
+/// Broadcast a scalar value to a 1-dimensional signed integer vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_INT32(x) FfxInt32(x)
+
+/// Broadcast a scalar value to a 2-dimensional signed integer vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_INT32X2(x) FfxInt32(x)
+
+/// Broadcast a scalar value to a 3-dimensional signed integer vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_INT32X3(x) FfxInt32(x)
+
+/// Broadcast a scalar value to a 4-dimensional signed integer vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_INT32X4(x) FfxInt32(x)
+
+/// Broadcast a scalar value to a 1-dimensional half-precision floating point vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_MIN_FLOAT16(a) FFXM_MIN16_F(a)
+
+/// Broadcast a scalar value to a 2-dimensional half-precision floating point vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_MIN_FLOAT16X2(a) FFXM_MIN16_F(a)
+
+/// Broadcast a scalar value to a 3-dimensional half-precision floating point vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_MIN_FLOAT16X3(a) FFXM_MIN16_F(a)
+
+/// Broadcast a scalar value to a 4-dimensional half-precision floating point vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_MIN_FLOAT16X4(a) FFXM_MIN16_F(a)
+
+/// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_MIN_UINT16(a) FFXM_MIN16_U(a)
+
+/// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_MIN_UINT16X2(a) FFXM_MIN16_U(a)
+
+/// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_MIN_UINT16X3(a) FFXM_MIN16_U(a)
+
+/// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_MIN_UINT16X4(a) FFXM_MIN16_U(a)
+
+/// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_MIN_INT16(a) FFXM_MIN16_I(a)
+
+/// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_MIN_INT16X2(a) FFXM_MIN16_I(a)
+
+/// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_MIN_INT16X3(a) FFXM_MIN16_I(a)
+
+/// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector.
+///
+/// @ingroup HLSLCore
+#define FFXM_BROADCAST_MIN_INT16X4(a) FFXM_MIN16_I(a)
+
+/// Pack 2x32-bit floating point values in a single 32bit value.
+///
+/// This function first converts each component of value into their nearest 16-bit floating
+/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the
+/// 32bit unsigned integer respectively.
+///
+/// @param [in] value A 2-dimensional floating point value to convert and pack.
+///
+/// @returns
+/// A packed 32bit value containing 2 16bit floating point values.
+///
+/// @ingroup HLSLCore
+FfxUInt32 packHalf2x16(FfxFloat32x2 value)
+{
+ return f32tof16(value.x) | (f32tof16(value.y) << 16);
+}
+
+/// Broadcast a scalar value to a 2-dimensional floating point vector.
+///
+/// @param [in] value The value to to broadcast.
+///
+/// @returns
+/// A 2-dimensional floating point vector with value in each component.
+///
+/// @ingroup HLSLCore
+FfxFloat32x2 ffxBroadcast2(FfxFloat32 value)
+{
+ return FfxFloat32x2(value, value);
+}
+
+/// Broadcast a scalar value to a 3-dimensional floating point vector.
+///
+/// @param [in] value The value to to broadcast.
+///
+/// @returns
+/// A 3-dimensional floating point vector with value in each component.
+///
+/// @ingroup HLSLCore
+FfxFloat32x3 ffxBroadcast3(FfxFloat32 value)
+{
+ return FfxFloat32x3(value, value, value);
+}
+
+/// Broadcast a scalar value to a 4-dimensional floating point vector.
+///
+/// @param [in] value The value to to broadcast.
+///
+/// @returns
+/// A 4-dimensional floating point vector with value in each component.
+///
+/// @ingroup HLSLCore
+FfxFloat32x4 ffxBroadcast4(FfxFloat32 value)
+{
+ return FfxFloat32x4(value, value, value, value);
+}
+
+/// Broadcast a scalar value to a 2-dimensional signed integer vector.
+///
+/// @param [in] value The value to to broadcast.
+///
+/// @returns
+/// A 2-dimensional signed integer vector with value in each component.
+///
+/// @ingroup HLSLCore
+FfxInt32x2 ffxBroadcast2(FfxInt32 value)
+{
+ return FfxInt32x2(value, value);
+}
+
+/// Broadcast a scalar value to a 3-dimensional signed integer vector.
+///
+/// @param [in] value The value to to broadcast.
+///
+/// @returns
+/// A 3-dimensional signed integer vector with value in each component.
+///
+/// @ingroup HLSLCore
+FfxUInt32x3 ffxBroadcast3(FfxInt32 value)
+{
+ return FfxUInt32x3(value, value, value);
+}
+
+/// Broadcast a scalar value to a 4-dimensional signed integer vector.
+///
+/// @param [in] value The value to to broadcast.
+///
+/// @returns
+/// A 4-dimensional signed integer vector with value in each component.
+///
+/// @ingroup HLSLCore
+FfxInt32x4 ffxBroadcast4(FfxInt32 value)
+{
+ return FfxInt32x4(value, value, value, value);
+}
+
+/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.
+///
+/// @param [in] value The value to to broadcast.
+///
+/// @returns
+/// A 2-dimensional unsigned integer vector with value in each component.
+///
+/// @ingroup HLSLCore
+FfxUInt32x2 ffxBroadcast2(FfxUInt32 value)
+{
+ return FfxUInt32x2(value, value);
+}
+
+/// Broadcast a scalar value to a 3-dimensional unsigned integer vector.
+///
+/// @param [in] value The value to to broadcast.
+///
+/// @returns
+/// A 3-dimensional unsigned integer vector with value in each component.
+///
+/// @ingroup HLSLCore
+FfxUInt32x3 ffxBroadcast3(FfxUInt32 value)
+{
+ return FfxUInt32x3(value, value, value);
+}
+
+/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
+///
+/// @param [in] value The value to to broadcast.
+///
+/// @returns
+/// A 4-dimensional unsigned integer vector with value in each component.
+///
+/// @ingroup HLSLCore
+FfxUInt32x4 ffxBroadcast4(FfxUInt32 value)
+{
+ return FfxUInt32x4(value, value, value, value);
+}
+
+FfxUInt32 bitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits)
+{
+ FfxUInt32 mask = (1u << bits) - 1;
+ return (src >> off) & mask;
+}
+
+FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask)
+{
+ return (ins & mask) | (src & (~mask));
+}
+
+FfxUInt32 bitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits)
+{
+ FfxUInt32 mask = (1u << bits) - 1;
+ return (ins & mask) | (src & (~mask));
+}
+
+/// Interprets the bit pattern of x as an unsigned integer.
+///
+/// @param [in] x The input value.
+///
+/// @returns
+/// The input interpreted as an unsigned integer.
+///
+/// @ingroup HLSLCore
+FfxUInt32 ffxAsUInt32(FfxFloat32 x)
+{
+ return asuint(x);
+}
+
+/// Interprets the bit pattern of x as an unsigned integer.
+///
+/// @param [in] x The input value.
+///
+/// @returns
+/// The input interpreted as an unsigned integer.
+///
+/// @ingroup HLSLCore
+FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x)
+{
+ return asuint(x);
+}
+
+/// Interprets the bit pattern of x as an unsigned integer.
+///
+/// @param [in] x The input value.
+///
+/// @returns
+/// The input interpreted as an unsigned integer.
+///
+/// @ingroup HLSLCore
+FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x)
+{
+ return asuint(x);
+}
+
+/// Interprets the bit pattern of x as an unsigned integer.
+///
+/// @param [in] x The input value.
+///
+/// @returns
+/// The input interpreted as an unsigned integer.
+///
+/// @ingroup HLSLCore
+FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x)
+{
+ return asuint(x);
+}
+
+/// Interprets the bit pattern of x as a floating-point number.
+///
+/// @param [in] x The input value.
+///
+/// @returns
+/// The input interpreted as a floating-point number.
+///
+/// @ingroup HLSLCore
+FfxFloat32 ffxAsFloat(FfxUInt32 x)
+{
+ return asfloat(x);
+}
+
+/// Interprets the bit pattern of x as a floating-point number.
+///
+/// @param [in] x The input value.
+///
+/// @returns
+/// The input interpreted as a floating-point number.
+///
+/// @ingroup HLSLCore
+FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x)
+{
+ return asfloat(x);
+}
+
+/// Interprets the bit pattern of x as a floating-point number.
+///
+/// @param [in] x The input value.
+///
+/// @returns
+/// The input interpreted as a floating-point number.
+///
+/// @ingroup HLSLCore
+FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x)
+{
+ return asfloat(x);
+}
+
+/// Interprets the bit pattern of x as a floating-point number.
+///
+/// @param [in] x The input value.
+///
+/// @returns
+/// The input interpreted as a floating-point number.
+///
+/// @ingroup HLSLCore
+FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x)
+{
+ return asfloat(x);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the HLSL mix instrinsic function. Implements the
+/// following math:
+///
+/// (1 - t) * x + t * y
+///
+/// @param [in] x The first value to lerp between.
+/// @param [in] y The second value to lerp between.
+/// @param [in] t The value to determine how much of x and how much of y.
+///
+/// @returns
+/// A linearly interpolated value between x and y according to t.
+///
+/// @ingroup HLSLCore
+FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
+{
+ return lerp(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the HLSL mix instrinsic function. Implements the
+/// following math:
+///
+/// (1 - t) * x + t * y
+///
+/// @param [in] x The first value to lerp between.
+/// @param [in] y The second value to lerp between.
+/// @param [in] t The value to determine how much of x and how much of y.
+///
+/// @returns
+/// A linearly interpolated value between x and y according to t.
+///
+/// @ingroup HLSLCore
+FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t)
+{
+ return lerp(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the HLSL mix instrinsic function. Implements the
+/// following math:
+///
+/// (1 - t) * x + t * y
+///
+/// @param [in] x The first value to lerp between.
+/// @param [in] y The second value to lerp between.
+/// @param [in] t The value to determine how much of x and how much of y.
+///
+/// @returns
+/// A linearly interpolated value between x and y according to t.
+///
+/// @ingroup HLSLCore
+FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t)
+{
+ return lerp(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the HLSL mix instrinsic function. Implements the
+/// following math:
+///
+/// (1 - t) * x + t * y
+///
+/// @param [in] x The first value to lerp between.
+/// @param [in] y The second value to lerp between.
+/// @param [in] t The value to determine how much of x and how much of y.
+///
+/// @returns
+/// A linearly interpolated value between x and y according to t.
+///
+/// @ingroup HLSLCore
+FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t)
+{
+ return lerp(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the HLSL mix instrinsic function. Implements the
+/// following math:
+///
+/// (1 - t) * x + t * y
+///
+/// @param [in] x The first value to lerp between.
+/// @param [in] y The second value to lerp between.
+/// @param [in] t The value to determine how much of x and how much of y.
+///
+/// @returns
+/// A linearly interpolated value between x and y according to t.
+///
+/// @ingroup HLSLCore
+FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t)
+{
+ return lerp(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the HLSL mix instrinsic function. Implements the
+/// following math:
+///
+/// (1 - t) * x + t * y
+///
+/// @param [in] x The first value to lerp between.
+/// @param [in] y The second value to lerp between.
+/// @param [in] t The value to determine how much of x and how much of y.
+///
+/// @returns
+/// A linearly interpolated value between x and y according to t.
+///
+/// @ingroup HLSLCore
+FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t)
+{
+ return lerp(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the HLSL mix instrinsic function. Implements the
+/// following math:
+///
+/// (1 - t) * x + t * y
+///
+/// @param [in] x The first value to lerp between.
+/// @param [in] y The second value to lerp between.
+/// @param [in] t The value to determine how much of x and how much of y.
+///
+/// @returns
+/// A linearly interpolated value between x and y according to t.
+///
+/// @ingroup HLSLCore
+FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t)
+{
+ return lerp(x, y, t);
+}
+
+/// Clamp a value to a [0..1] range.
+///
+/// @param [in] x The value to clamp to [0..1] range.
+///
+/// @returns
+/// The clamped version of x.
+///
+/// @ingroup HLSLCore
+FfxFloat32 ffxSaturate(FfxFloat32 x)
+{
+ return saturate(x);
+}
+
+/// Clamp a value to a [0..1] range.
+///
+/// @param [in] x The value to clamp to [0..1] range.
+///
+/// @returns
+/// The clamped version of x.
+///
+/// @ingroup HLSLCore
+FfxFloat32x2 ffxSaturate(FfxFloat32x2 x)
+{
+ return saturate(x);
+}
+
+/// Clamp a value to a [0..1] range.
+///
+/// @param [in] x The value to clamp to [0..1] range.
+///
+/// @returns
+/// The clamped version of x.
+///
+/// @ingroup HLSLCore
+FfxFloat32x3 ffxSaturate(FfxFloat32x3 x)
+{
+ return saturate(x);
+}
+
+/// Clamp a value to a [0..1] range.
+///
+/// @param [in] x The value to clamp to [0..1] range.
+///
+/// @returns
+/// The clamped version of x.
+///
+/// @ingroup HLSLCore
+FfxFloat32x4 ffxSaturate(FfxFloat32x4 x)
+{
+ return saturate(x);
+}
+
+/// Compute the factional part of a decimal value.
+///
+/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function.
+///
+/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is
+/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic
+/// function.
+///
+/// @param [in] x The value to compute the fractional part from.
+///
+/// @returns
+/// The fractional part of x.
+///
+/// @ingroup HLSLCore
+FfxFloat32 ffxFract(FfxFloat32 x)
+{
+ return x - floor(x);
+}
+
+/// Compute the factional part of a decimal value.
+///
+/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function.
+///
+/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is
+/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic
+/// function.
+///
+/// @param [in] x The value to compute the fractional part from.
+///
+/// @returns
+/// The fractional part of x.
+///
+/// @ingroup HLSLCore
+FfxFloat32x2 ffxFract(FfxFloat32x2 x)
+{
+ return x - floor(x);
+}
+
+/// Compute the factional part of a decimal value.
+///
+/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function.
+///
+/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is
+/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic
+/// function.
+///
+/// @param [in] x The value to compute the fractional part from.
+///
+/// @returns
+/// The fractional part of x.
+///
+/// @ingroup HLSLCore
+FfxFloat32x3 ffxFract(FfxFloat32x3 x)
+{
+ return x - floor(x);
+}
+
+/// Compute the factional part of a decimal value.
+///
+/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function.
+///
+/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is
+/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic
+/// function.
+///
+/// @param [in] x The value to compute the fractional part from.
+///
+/// @returns
+/// The fractional part of x.
+///
+/// @ingroup HLSLCore
+FfxFloat32x4 ffxFract(FfxFloat32x4 x)
+{
+ return x - floor(x);
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the max calculation.
+/// @param [in] y The second value to include in the max calcuation.
+/// @param [in] z The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of x, y, and z.
+///
+/// @ingroup HLSLCore
+FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
+{
+ return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the max calculation.
+/// @param [in] y The second value to include in the max calcuation.
+/// @param [in] z The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of x, y, and z.
+///
+/// @ingroup HLSLCore
+FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
+{
+ return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the max calculation.
+/// @param [in] y The second value to include in the max calcuation.
+/// @param [in] z The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of x, y, and z.
+///
+/// @ingroup HLSLCore
+FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
+{
+ return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the max calculation.
+/// @param [in] y The second value to include in the max calcuation.
+/// @param [in] z The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of x, y, and z.
+///
+/// @ingroup HLSLCore
+FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
+{
+ return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the max calculation.
+/// @param [in] y The second value to include in the max calcuation.
+/// @param [in] z The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of x, y, and z.
+///
+/// @ingroup HLSLCore
+FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
+{
+ return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the max calculation.
+/// @param [in] y The second value to include in the max calcuation.
+/// @param [in] z The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of x, y, and z.
+///
+/// @ingroup HLSLCore
+FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
+{
+ return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the max calculation.
+/// @param [in] y The second value to include in the max calcuation.
+/// @param [in] z The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of x, y, and z.
+///
+/// @ingroup HLSLCore
+FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
+{
+ return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the max calculation.
+/// @param [in] y The second value to include in the max calcuation.
+/// @param [in] z The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of x, y, and z.
+///
+/// @ingroup HLSLCore
+FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
+{
+ return max(x, max(y, z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the median calculation.
+/// @param [in] y The second value to include in the median calcuation.
+/// @param [in] z The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of x, y, and z.
+///
+/// @ingroup HLSLCore
+FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
+{
+ return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the median calculation.
+/// @param [in] y The second value to include in the median calcuation.
+/// @param [in] z The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of x, y, and z.
+///
+/// @ingroup HLSLCore
+FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
+{
+ return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the median calculation.
+/// @param [in] y The second value to include in the median calcuation.
+/// @param [in] z The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of x, y, and z.
+///
+/// @ingroup HLSLCore
+FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
+{
+ return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the median calculation.
+/// @param [in] y The second value to include in the median calcuation.
+/// @param [in] z The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of x, y, and z.
+///
+/// @ingroup HLSLCore
+FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
+{
+ return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the median calculation.
+/// @param [in] y The second value to include in the median calcuation.
+/// @param [in] z The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of x, y, and z.
+///
+/// @ingroup HLSL
+FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z)
+{
+ return max(min(x, y), min(max(x, y), z));
+ // return min(max(min(y, z), x), max(y, z));
+ // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the median calculation.
+/// @param [in] y The second value to include in the median calcuation.
+/// @param [in] z The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of x, y, and z.
+///
+/// @ingroup HLSL
+FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z)
+{
+ return max(min(x, y), min(max(x, y), z));
+ // return min(max(min(y, z), x), max(y, z));
+ // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the median calculation.
+/// @param [in] y The second value to include in the median calcuation.
+/// @param [in] z The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of x, y, and z.
+///
+/// @ingroup HLSL
+FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z)
+{
+ return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single V_MED3_I32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the median calculation.
+/// @param [in] y The second value to include in the median calcuation.
+/// @param [in] z The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of x, y, and z.
+///
+/// @ingroup HLSL
+FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z)
+{
+ return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the min calculation.
+/// @param [in] y The second value to include in the min calcuation.
+/// @param [in] z The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of x, y, and z.
+///
+/// @ingroup HLSLCore
+FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
+{
+ return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the min calculation.
+/// @param [in] y The second value to include in the min calcuation.
+/// @param [in] z The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of x, y, and z.
+///
+/// @ingroup HLSLCore
+FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
+{
+ return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the min calculation.
+/// @param [in] y The second value to include in the min calcuation.
+/// @param [in] z The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of x, y, and z.
+///
+/// @ingroup HLSLCore
+FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
+{
+ return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the min calculation.
+/// @param [in] y The second value to include in the min calcuation.
+/// @param [in] z The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of x, y, and z.
+///
+/// @ingroup HLSLCore
+FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
+{
+ return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the min calculation.
+/// @param [in] y The second value to include in the min calcuation.
+/// @param [in] z The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of x, y, and z.
+///
+/// @ingroup HLSLCore
+FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
+{
+ return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the min calculation.
+/// @param [in] y The second value to include in the min calcuation.
+/// @param [in] z The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of x, y, and z.
+///
+/// @ingroup HLSLCore
+FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
+{
+ return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the min calculation.
+/// @param [in] y The second value to include in the min calculation.
+/// @param [in] z The third value to include in the min calculation.
+///
+/// @returns
+/// The minimum value of x, y, and z.
+///
+/// @ingroup HLSLCore
+FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
+{
+ return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware.
+///
+/// @param [in] x The first value to include in the min calculation.
+/// @param [in] y The second value to include in the min calcuation.
+/// @param [in] z The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of x, y, and z.
+///
+/// @ingroup HLSLCore
+FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
+{
+ return min(x, min(y, z));
+}
+
+
+FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)
+{
+ return FfxUInt32(FfxInt32(a) >> FfxInt32(b));
+}
+
+FfxUInt32 ffxPackF32(FfxFloat32x2 v){
+ FfxUInt32x2 p = FfxUInt32x2(f32tof16(FfxFloat32x2(v).x), f32tof16(FfxFloat32x2(v).y));
+ return p.x | (p.y << 16);
+}
+
+FfxFloat32x2 ffxUnpackF32(FfxUInt32 a){
+ return f16tof32(FfxUInt32x2(a & 0xFFFF, a >> 16));
+}
+
+//==============================================================================================================================
+// HLSL HALF
+//==============================================================================================================================
+//==============================================================================================================================
+// Need to use manual unpack to get optimal execution (don't use packed types in buffers directly).
+// Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/
+FFXM_MIN16_F2 ffxUint32ToFloat16x2(FfxUInt32 x)
+{
+ FfxFloat32x2 t = f16tof32(FfxUInt32x2(x & 0xFFFF, x >> 16));
+ return FFXM_MIN16_F2(t);
+}
+FFXM_MIN16_F4 ffxUint32x2ToFloat16x4(FfxUInt32x2 x)
+{
+ return FFXM_MIN16_F4(ffxUint32ToFloat16x2(x.x), ffxUint32ToFloat16x2(x.y));
+}
+FFXM_MIN16_U2 ffxUint32ToUint16x2(FfxUInt32 x)
+{
+ FfxUInt32x2 t = FfxUInt32x2(x & 0xFFFF, x >> 16);
+ return FFXM_MIN16_U2(t);
+}
+FFXM_MIN16_U4 ffxUint32x2ToUint16x4(FfxUInt32x2 x)
+{
+ return FFXM_MIN16_U4(ffxUint32ToUint16x2(x.x), ffxUint32ToUint16x2(x.y));
+}
+
+/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned.
+/// @param v Value to invert.
+/// @return If v = 0 returns 0. If v != 0 returns 1/v.
+FfxFloat32 ffxInvertSafe(FfxFloat32 v){
+ FfxFloat32 s = sign(v);
+ FfxFloat32 s2 = s*s;
+ return s2/(v + s2 - 1.0);
+}
+
+/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned.
+/// @param v Value to invert.
+/// @return If v = 0 returns 0. If v != 0 returns 1/v.
+FfxFloat32x2 ffxInvertSafe(FfxFloat32x2 v){
+ FfxFloat32x2 s = sign(v);
+ FfxFloat32x2 s2 = s*s;
+ return s2/(v + s2 - FfxFloat32x2(1.0, 1.0));
+}
+
+/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned.
+/// @param v Value to invert.
+/// @return If v = 0 returns 0. If v != 0 returns 1/v.
+FfxFloat32x3 ffxInvertSafe(FfxFloat32x3 v){
+ FfxFloat32x3 s = sign(v);
+ FfxFloat32x3 s2 = s*s;
+ return s2/(v + s2 - FfxFloat32x3(1.0, 1.0, 1.0));
+}
+
+/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned.
+/// @param v Value to invert.
+/// @return If v = 0 returns 0. If v != 0 returns 1/v.
+FfxFloat32x4 ffxInvertSafe(FfxFloat32x4 v){
+ FfxFloat32x4 s = sign(v);
+ FfxFloat32x4 s2 = s*s;
+ return s2/(v + s2 - FfxFloat32x4(1.0, 1.0, 1.0, 1.0));
+}
+
+#define FFXM_UINT32_TO_FLOAT16X2(x) ffxUint32ToFloat16x2(FfxUInt32(x))
+#if FFXM_HALF
+
+#define FFXM_UINT32X2_TO_FLOAT16X4(x) ffxUint32x2ToFloat16x4(FfxUInt32x2(x))
+#define FFXM_UINT32_TO_UINT16X2(x) ffxUint32ToUint16x2(FfxUInt32(x))
+#define FFXM_UINT32X2_TO_UINT16X4(x) ffxUint32x2ToUint16x4(FfxUInt32x2(x))
+
+FfxUInt32 ffxPackF16(FfxFloat16x2 v){
+ FfxUInt32x2 p = FfxUInt32x2(f32tof16(FfxFloat32x2(v).x), f32tof16(FfxFloat32x2(v).y));
+ return p.x | (p.y << 16);
+}
+
+FfxFloat16x2 ffxUnpackF16(FfxUInt32 a){
+ return FfxFloat16x2(f16tof32(FfxUInt32x2(a & 0xFFFF, a >> 16)));
+}
+
+//------------------------------------------------------------------------------------------------------------------------------
+FfxUInt32 FFXM_MIN16_F2ToUint32(FFXM_MIN16_F2 x)
+{
+ return f32tof16(x.x) + (f32tof16(x.y) << 16);
+}
+FfxUInt32x2 FFXM_MIN16_F4ToUint32x2(FFXM_MIN16_F4 x)
+{
+ return FfxUInt32x2(FFXM_MIN16_F2ToUint32(x.xy), FFXM_MIN16_F2ToUint32(x.zw));
+}
+FfxUInt32 FFXM_MIN16_U2ToUint32(FFXM_MIN16_U2 x)
+{
+ return FfxUInt32(x.x) + (FfxUInt32(x.y) << 16);
+}
+FfxUInt32x2 FFXM_MIN16_U4ToUint32x2(FFXM_MIN16_U4 x)
+{
+ return FfxUInt32x2(FFXM_MIN16_U2ToUint32(x.xy), FFXM_MIN16_U2ToUint32(x.zw));
+}
+#define FFXM_FLOAT16X2_TO_UINT32(x) FFXM_MIN16_F2ToUint32(FFXM_MIN16_F2(x))
+#define FFXM_FLOAT16X4_TO_UINT32X2(x) FFXM_MIN16_F4ToUint32x2(FFXM_MIN16_F4(x))
+#define FFXM_UINT16X2_TO_UINT32(x) FFXM_MIN16_U2ToUint32(FFXM_MIN16_U2(x))
+#define FFXM_UINT16X4_TO_UINT32X2(x) FFXM_MIN16_U4ToUint32x2(FFXM_MIN16_U4(x))
+
+#if (FFXM_HLSL_6_2) && !defined(FFXM_NO_16_BIT_CAST)
+#define FFXM_TO_UINT16(x) asuint16(x)
+#define FFXM_TO_UINT16X2(x) asuint16(x)
+#define FFXM_TO_UINT16X3(x) asuint16(x)
+#define FFXM_TO_UINT16X4(x) asuint16(x)
+#else
+#define FFXM_TO_UINT16(a) FFXM_MIN16_U(f32tof16(FfxFloat32(a)))
+#define FFXM_TO_UINT16X2(a) FFXM_MIN16_U2(FFXM_TO_UINT16((a).x), FFXM_TO_UINT16((a).y))
+#define FFXM_TO_UINT16X3(a) FFXM_MIN16_U3(FFXM_TO_UINT16((a).x), FFXM_TO_UINT16((a).y), FFXM_TO_UINT16((a).z))
+#define FFXM_TO_UINT16X4(a) FFXM_MIN16_U4(FFXM_TO_UINT16((a).x), FFXM_TO_UINT16((a).y), FFXM_TO_UINT16((a).z), FFXM_TO_UINT16((a).w))
+#endif // #if (FFXM_HLSL_6_2) && !defined(FFXM_NO_16_BIT_CAST)
+
+#if (FFXM_HLSL_6_2) && !defined(FFXM_NO_16_BIT_CAST)
+#define FFXM_TO_FLOAT16(x) asfloat16(x)
+#define FFXM_TO_FLOAT16X2(x) asfloat16(x)
+#define FFXM_TO_FLOAT16X3(x) asfloat16(x)
+#define FFXM_TO_FLOAT16X4(x) asfloat16(x)
+#else
+#define FFXM_TO_FLOAT16(a) FFXM_MIN16_F(f16tof32(FfxUInt32(a)))
+#define FFXM_TO_FLOAT16X2(a) FFXM_MIN16_F2(FFXM_TO_FLOAT16((a).x), FFXM_TO_FLOAT16((a).y))
+#define FFXM_TO_FLOAT16X3(a) FFXM_MIN16_F3(FFXM_TO_FLOAT16((a).x), FFXM_TO_FLOAT16((a).y), FFXM_TO_FLOAT16((a).z))
+#define FFXM_TO_FLOAT16X4(a) FFXM_MIN16_F4(FFXM_TO_FLOAT16((a).x), FFXM_TO_FLOAT16((a).y), FFXM_TO_FLOAT16((a).z), FFXM_TO_FLOAT16((a).w))
+#endif // #if (FFXM_HLSL_6_2) && !defined(FFXM_NO_16_BIT_CAST)
+
+//==============================================================================================================================
+#define FFXM_BROADCAST_FLOAT16(a) FFXM_MIN16_F(a)
+#define FFXM_BROADCAST_FLOAT16X2(a) FFXM_MIN16_F(a)
+#define FFXM_BROADCAST_FLOAT16X3(a) FFXM_MIN16_F(a)
+#define FFXM_BROADCAST_FLOAT16X4(a) FFXM_MIN16_F(a)
+
+//------------------------------------------------------------------------------------------------------------------------------
+#define FFXM_BROADCAST_INT16(a) FFXM_MIN16_I(a)
+#define FFXM_BROADCAST_INT16X2(a) FFXM_MIN16_I(a)
+#define FFXM_BROADCAST_INT16X3(a) FFXM_MIN16_I(a)
+#define FFXM_BROADCAST_INT16X4(a) FFXM_MIN16_I(a)
+
+//------------------------------------------------------------------------------------------------------------------------------
+#define FFXM_BROADCAST_UINT16(a) FFXM_MIN16_U(a)
+#define FFXM_BROADCAST_UINT16X2(a) FFXM_MIN16_U(a)
+#define FFXM_BROADCAST_UINT16X3(a) FFXM_MIN16_U(a)
+#define FFXM_BROADCAST_UINT16X4(a) FFXM_MIN16_U(a)
+
+//==============================================================================================================================
+FFXM_MIN16_U ffxAbsHalf(FFXM_MIN16_U a)
+{
+ return FFXM_MIN16_U(abs(FFXM_MIN16_I(a)));
+}
+FFXM_MIN16_U2 ffxAbsHalf(FFXM_MIN16_U2 a)
+{
+ return FFXM_MIN16_U2(abs(FFXM_MIN16_I2(a)));
+}
+FFXM_MIN16_U3 ffxAbsHalf(FFXM_MIN16_U3 a)
+{
+ return FFXM_MIN16_U3(abs(FFXM_MIN16_I3(a)));
+}
+FFXM_MIN16_U4 ffxAbsHalf(FFXM_MIN16_U4 a)
+{
+ return FFXM_MIN16_U4(abs(FFXM_MIN16_I4(a)));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFXM_MIN16_F ffxClampHalf(FFXM_MIN16_F x, FFXM_MIN16_F n, FFXM_MIN16_F m)
+{
+ return max(n, min(x, m));
+}
+FFXM_MIN16_F2 ffxClampHalf(FFXM_MIN16_F2 x, FFXM_MIN16_F2 n, FFXM_MIN16_F2 m)
+{
+ return max(n, min(x, m));
+}
+FFXM_MIN16_F3 ffxClampHalf(FFXM_MIN16_F3 x, FFXM_MIN16_F3 n, FFXM_MIN16_F3 m)
+{
+ return max(n, min(x, m));
+}
+FFXM_MIN16_F4 ffxClampHalf(FFXM_MIN16_F4 x, FFXM_MIN16_F4 n, FFXM_MIN16_F4 m)
+{
+ return max(n, min(x, m));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+// V_FRACT_F16 (note DX frac() is different).
+FFXM_MIN16_F ffxFract(FFXM_MIN16_F x)
+{
+ return x - floor(x);
+}
+FFXM_MIN16_F2 ffxFract(FFXM_MIN16_F2 x)
+{
+ return x - floor(x);
+}
+FFXM_MIN16_F3 ffxFract(FFXM_MIN16_F3 x)
+{
+ return x - floor(x);
+}
+FFXM_MIN16_F4 ffxFract(FFXM_MIN16_F4 x)
+{
+ return x - floor(x);
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFXM_MIN16_F ffxLerp(FFXM_MIN16_F x, FFXM_MIN16_F y, FFXM_MIN16_F a)
+{
+ return lerp(x, y, a);
+}
+FFXM_MIN16_F2 ffxLerp(FFXM_MIN16_F2 x, FFXM_MIN16_F2 y, FFXM_MIN16_F a)
+{
+ return lerp(x, y, a);
+}
+FFXM_MIN16_F2 ffxLerp(FFXM_MIN16_F2 x, FFXM_MIN16_F2 y, FFXM_MIN16_F2 a)
+{
+ return lerp(x, y, a);
+}
+FFXM_MIN16_F3 ffxLerp(FFXM_MIN16_F3 x, FFXM_MIN16_F3 y, FFXM_MIN16_F a)
+{
+ return lerp(x, y, a);
+}
+FFXM_MIN16_F3 ffxLerp(FFXM_MIN16_F3 x, FFXM_MIN16_F3 y, FFXM_MIN16_F3 a)
+{
+ return lerp(x, y, a);
+}
+FFXM_MIN16_F4 ffxLerp(FFXM_MIN16_F4 x, FFXM_MIN16_F4 y, FFXM_MIN16_F a)
+{
+ return lerp(x, y, a);
+}
+FFXM_MIN16_F4 ffxLerp(FFXM_MIN16_F4 x, FFXM_MIN16_F4 y, FFXM_MIN16_F4 a)
+{
+ return lerp(x, y, a);
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFXM_MIN16_F ffxMax3Half(FFXM_MIN16_F x, FFXM_MIN16_F y, FFXM_MIN16_F z)
+{
+ return max(x, max(y, z));
+}
+FFXM_MIN16_F2 ffxMax3Half(FFXM_MIN16_F2 x, FFXM_MIN16_F2 y, FFXM_MIN16_F2 z)
+{
+ return max(x, max(y, z));
+}
+FFXM_MIN16_F3 ffxMax3Half(FFXM_MIN16_F3 x, FFXM_MIN16_F3 y, FFXM_MIN16_F3 z)
+{
+ return max(x, max(y, z));
+}
+FFXM_MIN16_F4 ffxMax3Half(FFXM_MIN16_F4 x, FFXM_MIN16_F4 y, FFXM_MIN16_F4 z)
+{
+ return max(x, max(y, z));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFXM_MIN16_F ffxMin3Half(FFXM_MIN16_F x, FFXM_MIN16_F y, FFXM_MIN16_F z)
+{
+ return min(x, min(y, z));
+}
+FFXM_MIN16_F2 ffxMin3Half(FFXM_MIN16_F2 x, FFXM_MIN16_F2 y, FFXM_MIN16_F2 z)
+{
+ return min(x, min(y, z));
+}
+FFXM_MIN16_F3 ffxMin3Half(FFXM_MIN16_F3 x, FFXM_MIN16_F3 y, FFXM_MIN16_F3 z)
+{
+ return min(x, min(y, z));
+}
+FFXM_MIN16_F4 ffxMin3Half(FFXM_MIN16_F4 x, FFXM_MIN16_F4 y, FFXM_MIN16_F4 z)
+{
+ return min(x, min(y, z));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFXM_MIN16_F ffxMed3Half(FFXM_MIN16_F x, FFXM_MIN16_F y, FFXM_MIN16_F z)
+{
+ return max(min(x, y), min(max(x, y), z));
+}
+FFXM_MIN16_F2 ffxMed3Half(FFXM_MIN16_F2 x, FFXM_MIN16_F2 y, FFXM_MIN16_F2 z)
+{
+ return max(min(x, y), min(max(x, y), z));
+}
+FFXM_MIN16_F3 ffxMed3Half(FFXM_MIN16_F3 x, FFXM_MIN16_F3 y, FFXM_MIN16_F3 z)
+{
+ return max(min(x, y), min(max(x, y), z));
+}
+FFXM_MIN16_F4 ffxMed3Half(FFXM_MIN16_F4 x, FFXM_MIN16_F4 y, FFXM_MIN16_F4 z)
+{
+ return max(min(x, y), min(max(x, y), z));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFXM_MIN16_I ffxMed3Half(FFXM_MIN16_I x, FFXM_MIN16_I y, FFXM_MIN16_I z)
+{
+ return max(min(x, y), min(max(x, y), z));
+}
+FFXM_MIN16_I2 ffxMed3Half(FFXM_MIN16_I2 x, FFXM_MIN16_I2 y, FFXM_MIN16_I2 z)
+{
+ return max(min(x, y), min(max(x, y), z));
+}
+FFXM_MIN16_I3 ffxMed3Half(FFXM_MIN16_I3 x, FFXM_MIN16_I3 y, FFXM_MIN16_I3 z)
+{
+ return max(min(x, y), min(max(x, y), z));
+}
+FFXM_MIN16_I4 ffxMed3Half(FFXM_MIN16_I4 x, FFXM_MIN16_I4 y, FFXM_MIN16_I4 z)
+{
+ return max(min(x, y), min(max(x, y), z));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFXM_MIN16_F ffxReciprocalHalf(FFXM_MIN16_F x)
+{
+ return rcp(x);
+}
+FFXM_MIN16_F2 ffxReciprocalHalf(FFXM_MIN16_F2 x)
+{
+ return rcp(x);
+}
+FFXM_MIN16_F3 ffxReciprocalHalf(FFXM_MIN16_F3 x)
+{
+ return rcp(x);
+}
+FFXM_MIN16_F4 ffxReciprocalHalf(FFXM_MIN16_F4 x)
+{
+ return rcp(x);
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFXM_MIN16_F ffxReciprocalSquareRootHalf(FFXM_MIN16_F x)
+{
+ return rsqrt(x);
+}
+FFXM_MIN16_F2 ffxReciprocalSquareRootHalf(FFXM_MIN16_F2 x)
+{
+ return rsqrt(x);
+}
+FFXM_MIN16_F3 ffxReciprocalSquareRootHalf(FFXM_MIN16_F3 x)
+{
+ return rsqrt(x);
+}
+FFXM_MIN16_F4 ffxReciprocalSquareRootHalf(FFXM_MIN16_F4 x)
+{
+ return rsqrt(x);
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFXM_MIN16_F ffxSaturate(FFXM_MIN16_F x)
+{
+ return saturate(x);
+}
+FFXM_MIN16_F2 ffxSaturate(FFXM_MIN16_F2 x)
+{
+ return saturate(x);
+}
+FFXM_MIN16_F3 ffxSaturate(FFXM_MIN16_F3 x)
+{
+ return saturate(x);
+}
+FFXM_MIN16_F4 ffxSaturate(FFXM_MIN16_F4 x)
+{
+ return saturate(x);
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFXM_MIN16_U ffxBitShiftRightHalf(FFXM_MIN16_U a, FFXM_MIN16_U b)
+{
+ return FFXM_MIN16_U(FFXM_MIN16_I(a) >> FFXM_MIN16_I(b));
+}
+FFXM_MIN16_U2 ffxBitShiftRightHalf(FFXM_MIN16_U2 a, FFXM_MIN16_U2 b)
+{
+ return FFXM_MIN16_U2(FFXM_MIN16_I2(a) >> FFXM_MIN16_I2(b));
+}
+FFXM_MIN16_U3 ffxBitShiftRightHalf(FFXM_MIN16_U3 a, FFXM_MIN16_U3 b)
+{
+ return FFXM_MIN16_U3(FFXM_MIN16_I3(a) >> FFXM_MIN16_I3(b));
+}
+FFXM_MIN16_U4 ffxBitShiftRightHalf(FFXM_MIN16_U4 a, FFXM_MIN16_U4 b)
+{
+ return FFXM_MIN16_U4(FFXM_MIN16_I4(a) >> FFXM_MIN16_I4(b));
+}
+#endif // FFXM_HALF
+
+//==============================================================================================================================
+// HLSL WAVE
+//==============================================================================================================================
+#if defined(FFXM_WAVE)
+// Where 'x' must be a compile time literal.
+FfxFloat32 AWaveXorF1(FfxFloat32 v, FfxUInt32 x)
+{
+ return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxFloat32x2 AWaveXorF2(FfxFloat32x2 v, FfxUInt32 x)
+{
+ return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxFloat32x3 AWaveXorF3(FfxFloat32x3 v, FfxUInt32 x)
+{
+ return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxFloat32x4 AWaveXorF4(FfxFloat32x4 v, FfxUInt32 x)
+{
+ return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxUInt32 AWaveXorU1(FfxUInt32 v, FfxUInt32 x)
+{
+ return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxUInt32x2 AWaveXorU1(FfxUInt32x2 v, FfxUInt32 x)
+{
+ return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxUInt32x3 AWaveXorU1(FfxUInt32x3 v, FfxUInt32 x)
+{
+ return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxUInt32x4 AWaveXorU1(FfxUInt32x4 v, FfxUInt32 x)
+{
+ return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxBoolean AWaveIsFirstLane()
+{
+ return WaveIsFirstLane();
+}
+FfxUInt32 AWaveLaneIndex()
+{
+ return WaveGetLaneIndex();
+}
+FfxBoolean AWaveReadAtLaneIndexB1(FfxBoolean v, FfxUInt32 x)
+{
+ return WaveReadLaneAt(v, x);
+}
+FfxUInt32 AWavePrefixCountBits(FfxBoolean v)
+{
+ return WavePrefixCountBits(v);
+}
+FfxUInt32 AWaveActiveCountBits(FfxBoolean v)
+{
+ return WaveActiveCountBits(v);
+}
+FfxUInt32 AWaveReadLaneFirstU1(FfxUInt32 v)
+{
+ return WaveReadLaneFirst(v);
+}
+FfxUInt32 WaveOr(FfxUInt32 a)
+{
+ return WaveActiveBitOr(a);
+}
+FfxFloat32 WaveMin(FfxFloat32 a)
+{
+ return WaveActiveMin(a);
+}
+FfxFloat32 WaveMax(FfxFloat32 a)
+{
+ return WaveActiveMax(a);
+}
+FfxUInt32 WaveLaneCount()
+{
+ return WaveGetLaneCount();
+}
+FfxBoolean WaveAllTrue(FfxBoolean v)
+{
+ return WaveActiveAllTrue(v);
+}
+FfxFloat32 QuadReadX(FfxFloat32 v)
+{
+ return QuadReadAcrossX(v);
+}
+FfxFloat32x2 QuadReadX(FfxFloat32x2 v)
+{
+ return QuadReadAcrossX(v);
+}
+FfxFloat32 QuadReadY(FfxFloat32 v)
+{
+ return QuadReadAcrossY(v);
+}
+FfxFloat32x2 QuadReadY(FfxFloat32x2 v)
+{
+ return QuadReadAcrossY(v);
+}
+
+#if FFXM_HALF
+FfxFloat16x2 ffxWaveXorFloat16x2(FfxFloat16x2 v, FfxUInt32 x)
+{
+ return FFXM_UINT32_TO_FLOAT16X2(WaveReadLaneAt(FFXM_FLOAT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x));
+}
+FfxFloat16x4 ffxWaveXorFloat16x4(FfxFloat16x4 v, FfxUInt32 x)
+{
+ return FFXM_UINT32X2_TO_FLOAT16X4(WaveReadLaneAt(FFXM_FLOAT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x));
+}
+FfxUInt16x2 ffxWaveXorUint16x2(FfxUInt16x2 v, FfxUInt32 x)
+{
+ return FFXM_UINT32_TO_UINT16X2(WaveReadLaneAt(FFXM_UINT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x));
+}
+FfxUInt16x4 ffxWaveXorUint16x4(FfxUInt16x4 v, FfxUInt32 x)
+{
+ return FFXM_UINT32X2_TO_UINT16X4(WaveReadLaneAt(FFXM_UINT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x));
+}
+#endif // FFXM_HALF
+#endif // #if defined(FFXM_WAVE)
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_hlsl.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_hlsl.h.meta
new file mode 100644
index 0000000..9f41d30
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_hlsl.h.meta
@@ -0,0 +1,67 @@
+fileFormatVersion: 2
+guid: 3414d977001cdfc47846380911de9f05
+PluginImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ iconMap: {}
+ executionOrder: {}
+ defineConstraints: []
+ isPreloaded: 0
+ isOverridable: 1
+ isExplicitlyReferenced: 0
+ validateReferences: 1
+ platformData:
+ - first:
+ : Any
+ second:
+ enabled: 0
+ settings:
+ Exclude Android: 1
+ Exclude Editor: 1
+ Exclude GameCoreScarlett: 1
+ Exclude GameCoreXboxOne: 1
+ Exclude Linux64: 1
+ Exclude OSXUniversal: 1
+ Exclude PS4: 1
+ Exclude PS5: 1
+ Exclude WebGL: 1
+ Exclude Win: 1
+ Exclude Win64: 1
+ - first:
+ Any:
+ second:
+ enabled: 0
+ settings: {}
+ - first:
+ Editor: Editor
+ second:
+ enabled: 0
+ settings:
+ DefaultValueInitialized: true
+ - first:
+ Standalone: Linux64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: OSXUniversal
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_portability.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_portability.h
new file mode 100644
index 0000000..368cb2c
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_portability.h
@@ -0,0 +1,50 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+FfxFloat32x3 opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
+{
+ d = a + ffxBroadcast3(b);
+ return d;
+}
+
+FfxFloat32x3 opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a)
+{
+ d = a;
+ return d;
+}
+
+FfxFloat32x3 opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b)
+{
+ d = a * b;
+ return d;
+}
+
+FfxFloat32x3 opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
+{
+ d = a * ffxBroadcast3(b);
+ return d;
+}
+
+FfxFloat32x3 opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a)
+{
+ d = rcp(a);
+ return d;
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_portability.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_portability.h.meta
new file mode 100644
index 0000000..6858a54
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_core_portability.h.meta
@@ -0,0 +1,67 @@
+fileFormatVersion: 2
+guid: c599687271fc4f444a2858745fc7f0c5
+PluginImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ iconMap: {}
+ executionOrder: {}
+ defineConstraints: []
+ isPreloaded: 0
+ isOverridable: 1
+ isExplicitlyReferenced: 0
+ validateReferences: 1
+ platformData:
+ - first:
+ : Any
+ second:
+ enabled: 0
+ settings:
+ Exclude Android: 1
+ Exclude Editor: 1
+ Exclude GameCoreScarlett: 1
+ Exclude GameCoreXboxOne: 1
+ Exclude Linux64: 1
+ Exclude OSXUniversal: 1
+ Exclude PS4: 1
+ Exclude PS5: 1
+ Exclude WebGL: 1
+ Exclude Win: 1
+ Exclude Win64: 1
+ - first:
+ Any:
+ second:
+ enabled: 0
+ settings: {}
+ - first:
+ Editor: Editor
+ second:
+ enabled: 0
+ settings:
+ DefaultValueInitialized: true
+ - first:
+ Standalone: Linux64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: OSXUniversal
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl
new file mode 100644
index 0000000..8c23aab
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl
@@ -0,0 +1,100 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#define FSR2_BIND_SRV_INPUT_EXPOSURE 0
+#define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS 1
+#if FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+#define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 2
+#else
+#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 2
+#endif
+#define FSR2_BIND_SRV_INTERNAL_UPSCALED 3
+#define FSR2_BIND_SRV_LOCK_STATUS 4
+#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 5
+#define FSR2_BIND_SRV_LANCZOS_LUT 6
+#define FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT 7
+#define FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS 8
+#define FSR2_BIND_SRV_AUTO_EXPOSURE 9
+#define FSR2_BIND_SRV_LUMA_HISTORY 10
+#define FSR2_BIND_SRV_TEMPORAL_REACTIVE 11
+#define FSR2_BIND_SRV_NEW_LOCKS 12
+
+#define FSR2_BIND_CB_FSR2 0
+
+// Global mandatory defines
+#if !defined(FFXM_GPU)
+#define FFXM_GPU 1
+#endif
+#if !defined(FFXM_HLSL)
+#define FFXM_HLSL 1
+#endif
+
+#include "fsr2/ffxm_fsr2_callbacks_hlsl.h"
+#include "fsr2/ffxm_fsr2_common.h"
+#include "fsr2/ffxm_fsr2_sample.h"
+#include "fsr2/ffxm_fsr2_upsample.h"
+#include "fsr2/ffxm_fsr2_postprocess_lock_status.h"
+#include "fsr2/ffxm_fsr2_reproject.h"
+#include "fsr2/ffxm_fsr2_accumulate.h"
+
+#if defined(SHADER_API_PSSL)
+#pragma PSSL_target_output_format(target 0 FMT_FP16_ABGR)
+#pragma PSSL_target_output_format(target 1 FMT_FP16_ABGR)
+#pragma PSSL_target_output_format(target 2 FMT_FP16_ABGR)
+#endif
+
+struct AccumulateOutputsFS
+{
+#if !FFXM_SHADER_QUALITY_BALANCED_OR_PERFORMANCE
+ FfxFloat32x4 fColorAndWeight : SV_TARGET0;
+ FfxFloat32x2 fLockStatus : SV_TARGET1;
+ FfxFloat32x4 fLumaHistory : SV_TARGET2;
+#if FFXM_FSR2_OPTION_APPLY_SHARPENING == 0
+ FfxFloat32x4 fColor : SV_TARGET3;
+#endif
+#else // FFXM_SHADER_QUALITY_BALANCED_OR_PERFORMANCE
+ FfxFloat32x4 fUpscaledColor : SV_TARGET0;
+ FfxFloat32 fTemporalReactive : SV_TARGET1;
+ FfxFloat32x2 fLockStatus : SV_TARGET2;
+#if FFXM_FSR2_OPTION_APPLY_SHARPENING == 0
+ FfxFloat32x4 fColor : SV_TARGET3;
+#endif
+#endif
+};
+
+AccumulateOutputsFS main(float4 SvPosition : SV_POSITION)
+{
+ uint2 uPixelCoord = uint2(SvPosition.xy);
+ AccumulateOutputs result = Accumulate(uPixelCoord);
+ AccumulateOutputsFS output = (AccumulateOutputsFS)0;
+#if !FFXM_SHADER_QUALITY_BALANCED_OR_PERFORMANCE
+ output.fColorAndWeight = result.fColorAndWeight;
+ output.fLumaHistory = result.fLumaHistory;
+#else
+ output.fUpscaledColor = FfxFloat32x4(result.fUpscaledColor, 1.0f);
+ output.fTemporalReactive = result.fTemporalReactive;
+#endif
+ output.fLockStatus = result.fLockStatus;
+#if FFXM_FSR2_OPTION_APPLY_SHARPENING == 0
+ output.fColor = FfxFloat32x4(result.fColor, 1.0f);
+#endif
+ return output;
+}
\ No newline at end of file
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_convert.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl.meta
similarity index 75%
rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_convert.hlsl.meta
rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl.meta
index 854e60d..7d82e1e 100644
--- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_convert.hlsl.meta
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_accumulate_pass_fs.hlsl.meta
@@ -1,5 +1,5 @@
fileFormatVersion: 2
-guid: d980c2a95e08a894d96b558154687e24
+guid: 63de8005a89afab4298bbc1e2edf2a01
ShaderIncludeImporter:
externalObjects: {}
userData:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl
new file mode 100644
index 0000000..64b5b29
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl
@@ -0,0 +1,79 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY 0
+#define FSR2_BIND_SRV_INPUT_COLOR 1
+
+#define FSR2_BIND_CB_FSR2 0
+#define FSR2_BIND_CB_REACTIVE 1
+
+// Global mandatory defines
+#if !defined(FFXM_GPU)
+#define FFXM_GPU 1
+#endif
+#if !defined(FFXM_HLSL)
+#define FFXM_HLSL 1
+#endif
+
+#include "fsr2/ffxm_fsr2_callbacks_hlsl.h"
+#include "fsr2/ffxm_fsr2_common.h"
+
+#if defined(SHADER_API_PSSL)
+#pragma PSSL_target_output_format(default FMT_FP16_ABGR)
+#endif
+
+struct GenReactiveMaskOutputs
+{
+ FfxFloat32 fReactiveMask : SV_TARGET0;
+};
+
+GenReactiveMaskOutputs main(float4 SvPosition : SV_POSITION)
+{
+ uint2 uPixelCoord = uint2(SvPosition.xy);
+
+ float3 ColorPreAlpha = LoadOpaqueOnly( FFXM_MIN16_I2(uPixelCoord) ).rgb;
+ float3 ColorPostAlpha = LoadInputColor(uPixelCoord).rgb;
+
+ if (GenReactiveFlags() & FFXM_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP)
+ {
+ ColorPreAlpha = Tonemap(ColorPreAlpha);
+ ColorPostAlpha = Tonemap(ColorPostAlpha);
+ }
+
+ if (GenReactiveFlags() & FFXM_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP)
+ {
+ ColorPreAlpha = InverseTonemap(ColorPreAlpha);
+ ColorPostAlpha = InverseTonemap(ColorPostAlpha);
+ }
+
+ float out_reactive_value = 0.f;
+ float3 delta = abs(ColorPostAlpha - ColorPreAlpha);
+
+ out_reactive_value = (GenReactiveFlags() & FFXM_FSR2_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX) ? max(delta.x, max(delta.y, delta.z)) : length(delta);
+ out_reactive_value *= GenReactiveScale();
+
+ out_reactive_value = (GenReactiveFlags() & FFXM_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD) ? (out_reactive_value < GenReactiveThreshold() ? 0 : GenReactiveBinaryValue()) : out_reactive_value;
+
+ GenReactiveMaskOutputs results = (GenReactiveMaskOutputs)0;
+ results.fReactiveMask = out_reactive_value;
+
+ return results;
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_upscale.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl.meta
similarity index 75%
rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_upscale.hlsl.meta
rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl.meta
index b81c841..04141bb 100644
--- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_upscale.hlsl.meta
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_autogen_reactive_pass_fs.hlsl.meta
@@ -1,5 +1,5 @@
fileFormatVersion: 2
-guid: 5cde6f90c795fb841a38f37495375e6e
+guid: 59f96f119bfba924198951bea4194ecd
ShaderIncludeImporter:
externalObjects: {}
userData:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl
new file mode 100644
index 0000000..314e189
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl
@@ -0,0 +1,64 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#define FSR2_BIND_SRV_INPUT_COLOR 0
+#define FSR2_BIND_SRV_AUTO_EXPOSURE 1
+
+#define FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC 1
+#define FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE 2
+#define FSR2_BIND_UAV_EXPOSURE_MIP_5 3
+#define FSR2_BIND_UAV_AUTO_EXPOSURE 4
+
+#define FSR2_BIND_CB_FSR2 0
+#define FSR2_BIND_CB_SPD 1
+
+// Global mandatory defines
+#if !defined(FFXM_GPU)
+#define FFXM_GPU 1
+#endif
+#if !defined(FFXM_HLSL)
+#define FFXM_HLSL 1
+#endif
+
+#include "fsr2/ffxm_fsr2_callbacks_hlsl.h"
+#include "fsr2/ffxm_fsr2_common.h"
+#include "fsr2/ffxm_fsr2_compute_luminance_pyramid.h"
+
+#ifndef FFXM_FSR2_THREAD_GROUP_WIDTH
+#define FFXM_FSR2_THREAD_GROUP_WIDTH 256
+#endif // #ifndef FFXM_FSR2_THREAD_GROUP_WIDTH
+#ifndef FFXM_FSR2_THREAD_GROUP_HEIGHT
+#define FFXM_FSR2_THREAD_GROUP_HEIGHT 1
+#endif // #ifndef FFXM_FSR2_THREAD_GROUP_HEIGHT
+#ifndef FFXM_FSR2_THREAD_GROUP_DEPTH
+#define FFXM_FSR2_THREAD_GROUP_DEPTH 1
+#endif // #ifndef FFXM_FSR2_THREAD_GROUP_DEPTH
+#ifndef FFXM_FSR2_NUM_THREADS
+#define FFXM_FSR2_NUM_THREADS [numthreads(FFXM_FSR2_THREAD_GROUP_WIDTH, FFXM_FSR2_THREAD_GROUP_HEIGHT, FFXM_FSR2_THREAD_GROUP_DEPTH)]
+#endif // #ifndef FFXM_FSR2_NUM_THREADS
+
+FFXM_PREFER_WAVE64
+FFXM_FSR2_NUM_THREADS
+FFXM_FSR2_EMBED_CB2_ROOTSIG_CONTENT
+void main(uint3 WorkGroupId : SV_GroupID, uint LocalThreadIndex : SV_GroupIndex)
+{
+ ComputeAutoExposure(WorkGroupId, LocalThreadIndex);
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_activate.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl.meta
similarity index 75%
rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_activate.hlsl.meta
rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl.meta
index 790c78a..a066167 100644
--- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_activate.hlsl.meta
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_compute_luminance_pyramid_pass.hlsl.meta
@@ -1,5 +1,5 @@
fileFormatVersion: 2
-guid: 503a58dfbaf241c4cbabb2b264d66f96
+guid: 4670a9ebaa60c3143be978efc227163b
ShaderIncludeImporter:
externalObjects: {}
userData:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl
new file mode 100644
index 0000000..dd80fa0
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl
@@ -0,0 +1,67 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#define FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0
+#define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 1
+#define FSR2_BIND_SRV_DILATED_DEPTH 2
+#define FSR2_BIND_SRV_REACTIVE_MASK 3
+#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 4
+#define FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS 5
+#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 6
+#define FSR2_BIND_SRV_INPUT_COLOR 7
+#define FSR2_BIND_SRV_INPUT_DEPTH 8
+#define FSR2_BIND_SRV_INPUT_EXPOSURE 9
+
+#define FSR2_BIND_CB_FSR2 0
+
+// Global mandatory defines
+#if !defined(FFXM_GPU)
+#define FFXM_GPU 1
+#endif
+#if !defined(FFXM_HLSL)
+#define FFXM_HLSL 1
+#endif
+
+#include "fsr2/ffxm_fsr2_callbacks_hlsl.h"
+#include "fsr2/ffxm_fsr2_common.h"
+#include "fsr2/ffxm_fsr2_sample.h"
+#include "fsr2/ffxm_fsr2_depth_clip.h"
+
+#if defined(SHADER_API_PSSL)
+#pragma PSSL_target_output_format(target 0 FMT_FP16_ABGR)
+#pragma PSSL_target_output_format(target 1 FMT_FP16_ABGR)
+#endif
+
+struct DepthClipOutputsFS
+{
+ FfxFloat32x4 fTonemapped : SV_TARGET0;
+ FfxFloat32x2 fDilatedReactiveMasks : SV_TARGET1;
+};
+
+DepthClipOutputsFS main(float4 SvPosition : SV_POSITION)
+{
+ uint2 uPixelCoord = uint2(SvPosition.xy);
+ DepthClipOutputs result = DepthClip(uPixelCoord);
+ DepthClipOutputsFS output = (DepthClipOutputsFS)0;
+ output.fTonemapped = result.fTonemapped;
+ output.fDilatedReactiveMasks = result.fDilatedReactiveMasks;
+ return output;
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl.meta
new file mode 100644
index 0000000..c6497c2
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_depth_clip_pass_fs.hlsl.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: 2825c941cb2d43145b426c42ec6e7869
+ShaderIncludeImporter:
+ externalObjects: {}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_lock_pass.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_lock_pass.hlsl
new file mode 100644
index 0000000..2da63f2
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_lock_pass.hlsl
@@ -0,0 +1,63 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#define FSR2_BIND_SRV_LOCK_INPUT_LUMA 0
+
+#define FSR2_BIND_UAV_NEW_LOCKS 1
+#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 2
+
+#define FSR2_BIND_CB_FSR2 0
+
+// Global mandatory defines
+#if !defined(FFXM_GPU)
+#define FFXM_GPU 1
+#endif
+#if !defined(FFXM_HLSL)
+#define FFXM_HLSL 1
+#endif
+
+#include "fsr2/ffxm_fsr2_callbacks_hlsl.h"
+#include "fsr2/ffxm_fsr2_common.h"
+#include "fsr2/ffxm_fsr2_sample.h"
+#include "fsr2/ffxm_fsr2_lock.h"
+
+#ifndef FFXM_FSR2_THREAD_GROUP_WIDTH
+#define FFXM_FSR2_THREAD_GROUP_WIDTH 8
+#endif // #ifndef FFXM_FSR2_THREAD_GROUP_WIDTH
+#ifndef FFXM_FSR2_THREAD_GROUP_HEIGHT
+#define FFXM_FSR2_THREAD_GROUP_HEIGHT 8
+#endif // #ifndef FFXM_FSR2_THREAD_GROUP_HEIGHT
+#ifndef FFXM_FSR2_THREAD_GROUP_DEPTH
+#define FFXM_FSR2_THREAD_GROUP_DEPTH 1
+#endif // #ifndef FFXM_FSR2_THREAD_GROUP_DEPTH
+#ifndef FFXM_FSR2_NUM_THREADS
+#define FFXM_FSR2_NUM_THREADS [numthreads(FFXM_FSR2_THREAD_GROUP_WIDTH, FFXM_FSR2_THREAD_GROUP_HEIGHT, FFXM_FSR2_THREAD_GROUP_DEPTH)]
+#endif // #ifndef FFXM_FSR2_NUM_THREADS
+
+FFXM_PREFER_WAVE64
+FFXM_FSR2_NUM_THREADS
+FFXM_FSR2_EMBED_ROOTSIG_CONTENT
+void main(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID)
+{
+ uint2 uDispatchThreadId = uGroupId * uint2(FFXM_FSR2_THREAD_GROUP_WIDTH, FFXM_FSR2_THREAD_GROUP_HEIGHT) + uGroupThreadId;
+
+ ComputeLock(uDispatchThreadId);
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_lock_pass.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_lock_pass.hlsl.meta
new file mode 100644
index 0000000..6867472
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_lock_pass.hlsl.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: 78914a065e6727e4d8255fb76b44d5da
+ShaderIncludeImporter:
+ externalObjects: {}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl
new file mode 100644
index 0000000..b5585ec
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl
@@ -0,0 +1,52 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#define FSR2_BIND_SRV_INPUT_EXPOSURE 0
+#define FSR2_BIND_SRV_RCAS_INPUT 1
+
+#define FSR2_BIND_CB_FSR2 0
+#define FSR2_BIND_CB_RCAS 1
+
+// Global mandatory defines
+#if !defined(FFXM_GPU)
+#define FFXM_GPU 1
+#endif
+#if !defined(FFXM_HLSL)
+#define FFXM_HLSL 1
+#endif
+
+#include "fsr2/ffxm_fsr2_callbacks_hlsl.h"
+#include "fsr2/ffxm_fsr2_common.h"
+#include "fsr2/ffxm_fsr2_rcas.h"
+
+struct RCASOutputsFS
+{
+ FfxFloat32x4 fUpscaledColor : SV_TARGET0;
+};
+
+RCASOutputsFS main(float4 SvPosition : SV_POSITION)
+{
+ uint2 uPixelCoord = uint2(SvPosition.xy);
+ RCASOutputs result = RCAS(uPixelCoord);
+ RCASOutputsFS output = (RCASOutputsFS)0;
+ output.fUpscaledColor = FfxFloat32x4(result.fUpscaledColor, 1.0f);
+ return output;
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl.meta
new file mode 100644
index 0000000..ce5e743
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_rcas_pass_fs.hlsl.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: 16a306235fdc01044a347f0cb0a9b147
+ShaderIncludeImporter:
+ externalObjects: {}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl
new file mode 100644
index 0000000..457665b
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl
@@ -0,0 +1,68 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 0
+#define FSR2_BIND_SRV_INPUT_DEPTH 1
+#define FSR2_BIND_SRV_INPUT_COLOR 2
+#define FSR2_BIND_SRV_INPUT_EXPOSURE 3
+
+#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 3
+
+#define FSR2_BIND_CB_FSR2 0
+
+// Global mandatory defines
+#if !defined(FFXM_GPU)
+#define FFXM_GPU 1
+#endif
+#if !defined(FFXM_HLSL)
+#define FFXM_HLSL 1
+#endif
+
+#include "fsr2/ffxm_fsr2_callbacks_hlsl.h"
+#include "fsr2/ffxm_fsr2_common.h"
+#include "fsr2/ffxm_fsr2_sample.h"
+#include "fsr2/ffxm_fsr2_reconstruct_dilated_velocity_and_previous_depth.h"
+
+#if defined(SHADER_API_PSSL)
+#pragma PSSL_target_output_format(target 0 FMT_32_R)
+#pragma PSSL_target_output_format(target 1 FMT_32_R)
+#pragma PSSL_target_output_format(target 2 FMT_FP16_ABGR)
+#pragma PSSL_target_output_format(target 3 FMT_32_R)
+#endif
+
+struct ReconstructPrevDepthOutputsFS
+{
+ FfxFloat32 fDepth : SV_TARGET0;
+ FfxFloat32 fLuma : SV_TARGET1;
+ FfxFloat32x2 fMotionVector : SV_TARGET2;
+};
+
+
+ReconstructPrevDepthOutputsFS main(float4 SvPosition : SV_POSITION)
+{
+ uint2 uPixelCoord = uint2(SvPosition.xy);
+ ReconstructPrevDepthOutputs result = ReconstructAndDilate(uPixelCoord);
+ ReconstructPrevDepthOutputsFS output = (ReconstructPrevDepthOutputsFS)0;
+ output.fDepth = result.fDepth;
+ output.fLuma = result.fLuma;
+ output.fMotionVector = result.fMotionVector;
+ return output;
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl.meta
new file mode 100644
index 0000000..7888885
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_reconstruct_previous_depth_pass_fs.hlsl.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: bdcb34025b67be743a32494703775cc1
+ShaderIncludeImporter:
+ externalObjects: {}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl
new file mode 100644
index 0000000..71bdcab
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl
@@ -0,0 +1,44 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+// Global mandatory defines
+#if !defined(FFXM_GPU)
+#define FFXM_GPU 1
+#endif
+#if !defined(FFXM_HLSL)
+#define FFXM_HLSL 1
+#endif
+
+#define FSR2_BIND_CB_FSR2 0
+
+struct VertexOut
+{
+ float4 position : SV_POSITION;
+};
+
+VertexOut VertMain(uint uVertexId : SV_VERTEXID)
+{
+ VertexOut output;
+ float2 uv = float2(uVertexId & 1, uVertexId >> 1) * 2.0;
+ output.position = float4(uv * 2.0 - 1.0, 0.0, 1.0);
+
+ return output;
+}
\ No newline at end of file
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl.meta
new file mode 100644
index 0000000..5d65f24
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/ffxm_fsr2_vs.hlsl.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: a9dfeac9728e7404f97655aac002e5eb
+ShaderIncludeImporter:
+ externalObjects: {}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr1.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr1.meta
new file mode 100644
index 0000000..431aa13
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr1.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: 825bf9eee2b16c7499e5cfb3c9721df0
+folderAsset: yes
+DefaultImporter:
+ externalObjects: {}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr1/ffxm_fsr1.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr1/ffxm_fsr1.h
new file mode 100644
index 0000000..ad5c865
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr1/ffxm_fsr1.h
@@ -0,0 +1,1251 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+/// @defgroup FfxGPUFsr1 FidelityFX FSR1
+/// FidelityFX Super Resolution 1 GPU documentation
+///
+/// @ingroup FfxGPUEffects
+
+/// Setup required constant values for EASU (works on CPU or GPU).
+///
+/// @param [out] con0
+/// @param [out] con1
+/// @param [out] con2
+/// @param [out] con3
+/// @param [in] inputViewportInPixelsX The rendered image resolution being upscaled in X dimension.
+/// @param [in] inputViewportInPixelsY The rendered image resolution being upscaled in Y dimension.
+/// @param [in] inputSizeInPixelsX The resolution of the resource containing the input image (useful for dynamic resolution) in X dimension.
+/// @param [in] inputSizeInPixelsY The resolution of the resource containing the input image (useful for dynamic resolution) in Y dimension.
+/// @param [in] outputSizeInPixelsX The display resolution which the input image gets upscaled to in X dimension.
+/// @param [in] outputSizeInPixelsY The display resolution which the input image gets upscaled to in Y dimension.
+///
+/// @ingroup FfxGPUFsr1
+FFXM_STATIC void ffxFsrPopulateEasuConstants(
+ FFXM_PARAMETER_INOUT FfxUInt32x4 con0,
+ FFXM_PARAMETER_INOUT FfxUInt32x4 con1,
+ FFXM_PARAMETER_INOUT FfxUInt32x4 con2,
+ FFXM_PARAMETER_INOUT FfxUInt32x4 con3,
+ FFXM_PARAMETER_IN FfxFloat32 inputViewportInPixelsX,
+ FFXM_PARAMETER_IN FfxFloat32 inputViewportInPixelsY,
+ FFXM_PARAMETER_IN FfxFloat32 inputSizeInPixelsX,
+ FFXM_PARAMETER_IN FfxFloat32 inputSizeInPixelsY,
+ FFXM_PARAMETER_IN FfxFloat32 outputSizeInPixelsX,
+ FFXM_PARAMETER_IN FfxFloat32 outputSizeInPixelsY)
+{
+ // Output integer position to a pixel position in viewport.
+ con0[0] = ffxAsUInt32(inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX));
+ con0[1] = ffxAsUInt32(inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY));
+ con0[2] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX) - FfxFloat32(0.5));
+ con0[3] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY) - FfxFloat32(0.5));
+
+ // Viewport pixel position to normalized image space.
+ // This is used to get upper-left of 'F' tap.
+ con1[0] = ffxAsUInt32(ffxReciprocal(inputSizeInPixelsX));
+ con1[1] = ffxAsUInt32(ffxReciprocal(inputSizeInPixelsY));
+
+ // Centers of gather4, first offset from upper-left of 'F'.
+ // +---+---+
+ // | | |
+ // +--(0)--+
+ // | b | c |
+ // +---F---+---+---+
+ // | e | f | g | h |
+ // +--(1)--+--(2)--+
+ // | i | j | k | l |
+ // +---+---+---+---+
+ // | n | o |
+ // +--(3)--+
+ // | | |
+ // +---+---+
+ con1[2] = ffxAsUInt32(FfxFloat32(1.0) * ffxReciprocal(inputSizeInPixelsX));
+ con1[3] = ffxAsUInt32(FfxFloat32(-1.0) * ffxReciprocal(inputSizeInPixelsY));
+
+ // These are from (0) instead of 'F'.
+ con2[0] = ffxAsUInt32(FfxFloat32(-1.0) * ffxReciprocal(inputSizeInPixelsX));
+ con2[1] = ffxAsUInt32(FfxFloat32(2.0) * ffxReciprocal(inputSizeInPixelsY));
+ con2[2] = ffxAsUInt32(FfxFloat32(1.0) * ffxReciprocal(inputSizeInPixelsX));
+ con2[3] = ffxAsUInt32(FfxFloat32(2.0) * ffxReciprocal(inputSizeInPixelsY));
+ con3[0] = ffxAsUInt32(FfxFloat32(0.0) * ffxReciprocal(inputSizeInPixelsX));
+ con3[1] = ffxAsUInt32(FfxFloat32(4.0) * ffxReciprocal(inputSizeInPixelsY));
+ con3[2] = con3[3] = 0;
+}
+
+/// Setup required constant values for EASU (works on CPU or GPU).
+///
+/// @param [out] con0
+/// @param [out] con1
+/// @param [out] con2
+/// @param [out] con3
+/// @param [in] inputViewportInPixelsX The resolution of the input in the X dimension.
+/// @param [in] inputViewportInPixelsY The resolution of the input in the Y dimension.
+/// @param [in] inputSizeInPixelsX The input size in pixels in the X dimension.
+/// @param [in] inputSizeInPixelsY The input size in pixels in the Y dimension.
+/// @param [in] outputSizeInPixelsX The output size in pixels in the X dimension.
+/// @param [in] outputSizeInPixelsY The output size in pixels in the Y dimension.
+/// @param [in] inputOffsetInPixelsX The input image offset in the X dimension into the resource containing it (useful for dynamic resolution).
+/// @param [in] inputOffsetInPixelsY The input image offset in the Y dimension into the resource containing it (useful for dynamic resolution).
+///
+/// @ingroup FfxGPUFsr1
+FFXM_STATIC void ffxFsrPopulateEasuConstantsOffset(
+ FFXM_PARAMETER_INOUT FfxUInt32x4 con0,
+ FFXM_PARAMETER_INOUT FfxUInt32x4 con1,
+ FFXM_PARAMETER_INOUT FfxUInt32x4 con2,
+ FFXM_PARAMETER_INOUT FfxUInt32x4 con3,
+ FFXM_PARAMETER_IN FfxFloat32 inputViewportInPixelsX,
+ FFXM_PARAMETER_IN FfxFloat32 inputViewportInPixelsY,
+ FFXM_PARAMETER_IN FfxFloat32 inputSizeInPixelsX,
+ FFXM_PARAMETER_IN FfxFloat32 inputSizeInPixelsY,
+ FFXM_PARAMETER_IN FfxFloat32 outputSizeInPixelsX,
+ FFXM_PARAMETER_IN FfxFloat32 outputSizeInPixelsY,
+ FFXM_PARAMETER_IN FfxFloat32 inputOffsetInPixelsX,
+ FFXM_PARAMETER_IN FfxFloat32 inputOffsetInPixelsY)
+{
+ ffxFsrPopulateEasuConstants(
+ con0,
+ con1,
+ con2,
+ con3,
+ inputViewportInPixelsX,
+ inputViewportInPixelsY,
+ inputSizeInPixelsX,
+ inputSizeInPixelsY,
+ outputSizeInPixelsX,
+ outputSizeInPixelsY);
+
+ // override
+ con0[2] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX) - FfxFloat32(0.5) + inputOffsetInPixelsX);
+ con0[3] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY) - FfxFloat32(0.5) + inputOffsetInPixelsY);
+}
+
+#if defined(FFXM_GPU) && defined(FFXM_FSR_EASU_FLOAT)
+// Input callback prototypes, need to be implemented by calling shader
+FfxFloat32x4 FsrEasuRF(FfxFloat32x2 p);
+FfxFloat32x4 FsrEasuGF(FfxFloat32x2 p);
+FfxFloat32x4 FsrEasuBF(FfxFloat32x2 p);
+
+// Filtering for a given tap for the scalar.
+void fsrEasuTapFloat(
+ FFXM_PARAMETER_INOUT FfxFloat32x3 accumulatedColor, // Accumulated color, with negative lobe.
+ FFXM_PARAMETER_INOUT FfxFloat32 accumulatedWeight, // Accumulated weight.
+ FFXM_PARAMETER_IN FfxFloat32x2 pixelOffset, // Pixel offset from resolve position to tap.
+ FFXM_PARAMETER_IN FfxFloat32x2 gradientDirection, // Gradient direction.
+ FFXM_PARAMETER_IN FfxFloat32x2 length, // Length.
+ FFXM_PARAMETER_IN FfxFloat32 negativeLobeStrength, // Negative lobe strength.
+ FFXM_PARAMETER_IN FfxFloat32 clippingPoint, // Clipping point.
+ FFXM_PARAMETER_IN FfxFloat32x3 color) // Tap color.
+{
+ // Rotate offset by direction.
+ FfxFloat32x2 rotatedOffset;
+ rotatedOffset.x = (pixelOffset.x * (gradientDirection.x)) + (pixelOffset.y * gradientDirection.y);
+ rotatedOffset.y = (pixelOffset.x * (-gradientDirection.y)) + (pixelOffset.y * gradientDirection.x);
+
+ // Anisotropy.
+ rotatedOffset *= length;
+
+ // Compute distance^2.
+ FfxFloat32 distanceSquared = rotatedOffset.x * rotatedOffset.x + rotatedOffset.y * rotatedOffset.y;
+
+ // Limit to the window as at corner, 2 taps can easily be outside.
+ distanceSquared = ffxMin(distanceSquared, clippingPoint);
+
+ // Approximation of lancos2 without sin() or rcp(), or sqrt() to get x.
+ // (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2
+ // |_______________________________________| |_______________|
+ // base window
+ // The general form of the 'base' is,
+ // (a*(b*x^2-1)^2-(a-1))
+ // Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe.
+ FfxFloat32 weightB = FfxFloat32(2.0 / 5.0) * distanceSquared + FfxFloat32(-1.0);
+ FfxFloat32 weightA = negativeLobeStrength * distanceSquared + FfxFloat32(-1.0);
+ weightB *= weightB;
+ weightA *= weightA;
+ weightB = FfxFloat32(25.0 / 16.0) * weightB + FfxFloat32(-(25.0 / 16.0 - 1.0));
+ FfxFloat32 weight = weightB * weightA;
+
+ // Do weighted average.
+ accumulatedColor += color * weight;
+ accumulatedWeight += weight;
+}
+
+// Accumulate direction and length.
+void fsrEasuSetFloat(
+ FFXM_PARAMETER_INOUT FfxFloat32x2 direction,
+ FFXM_PARAMETER_INOUT FfxFloat32 length,
+ FFXM_PARAMETER_IN FfxFloat32x2 pp,
+ FFXM_PARAMETER_IN FfxBoolean biS,
+ FFXM_PARAMETER_IN FfxBoolean biT,
+ FFXM_PARAMETER_IN FfxBoolean biU,
+ FFXM_PARAMETER_IN FfxBoolean biV,
+ FFXM_PARAMETER_IN FfxFloat32 lA,
+ FFXM_PARAMETER_IN FfxFloat32 lB,
+ FFXM_PARAMETER_IN FfxFloat32 lC,
+ FFXM_PARAMETER_IN FfxFloat32 lD,
+ FFXM_PARAMETER_IN FfxFloat32 lE)
+{
+ // Compute bilinear weight, branches factor out as predicates are compiler time immediates.
+ // s t
+ // u v
+ FfxFloat32 weight = FfxFloat32(0.0);
+ if (biS)
+ weight = (FfxFloat32(1.0) - pp.x) * (FfxFloat32(1.0) - pp.y);
+ if (biT)
+ weight = pp.x * (FfxFloat32(1.0) - pp.y);
+ if (biU)
+ weight = (FfxFloat32(1.0) - pp.x) * pp.y;
+ if (biV)
+ weight = pp.x * pp.y;
+
+ // Direction is the '+' diff.
+ // a
+ // b c d
+ // e
+ // Then takes magnitude from abs average of both sides of 'c'.
+ // Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms.
+ FfxFloat32 dc = lD - lC;
+ FfxFloat32 cb = lC - lB;
+ FfxFloat32 lengthX = max(abs(dc), abs(cb));
+ lengthX = ffxApproximateReciprocal(lengthX);
+ FfxFloat32 directionX = lD - lB;
+ direction.x += directionX * weight;
+ lengthX = ffxSaturate(abs(directionX) * lengthX);
+ lengthX *= lengthX;
+ length += lengthX * weight;
+
+ // Repeat for the y axis.
+ FfxFloat32 ec = lE - lC;
+ FfxFloat32 ca = lC - lA;
+ FfxFloat32 lengthY = max(abs(ec), abs(ca));
+ lengthY = ffxApproximateReciprocal(lengthY);
+ FfxFloat32 directionY = lE - lA;
+ direction.y += directionY * weight;
+ lengthY = ffxSaturate(abs(directionY) * lengthY);
+ lengthY *= lengthY;
+ length += lengthY * weight;
+}
+
+/// Apply edge-aware spatial upsampling using 32bit floating point precision calculations.
+///
+/// @param [out] outPixel The computed color of a pixel.
+/// @param [in] integerPosition Integer pixel position within the output.
+/// @param [in] con0 The first constant value generated by ffxFsrPopulateEasuConstants.
+/// @param [in] con1 The second constant value generated by ffxFsrPopulateEasuConstants.
+/// @param [in] con2 The third constant value generated by ffxFsrPopulateEasuConstants.
+/// @param [in] con3 The fourth constant value generated by ffxFsrPopulateEasuConstants.
+///
+/// @ingroup FSR
+void ffxFsrEasuFloat(
+ FFXM_PARAMETER_OUT FfxFloat32x3 pix,
+ FFXM_PARAMETER_IN FfxUInt32x2 ip,
+ FFXM_PARAMETER_IN FfxUInt32x4 con0,
+ FFXM_PARAMETER_IN FfxUInt32x4 con1,
+ FFXM_PARAMETER_IN FfxUInt32x4 con2,
+ FFXM_PARAMETER_IN FfxUInt32x4 con3)
+{
+ // Get position of 'f'.
+ FfxFloat32x2 pp = FfxFloat32x2(ip) * ffxAsFloat(con0.xy) + ffxAsFloat(con0.zw);
+ FfxFloat32x2 fp = floor(pp);
+ pp -= fp;
+
+ // 12-tap kernel.
+ // b c
+ // e f g h
+ // i j k l
+ // n o
+ // Gather 4 ordering.
+ // a b
+ // r g
+ // For packed FP16, need either {rg} or {ab} so using the following setup for gather in all versions,
+ // a b <- unused (z)
+ // r g
+ // a b a b
+ // r g r g
+ // a b
+ // r g <- unused (z)
+ // Allowing dead-code removal to remove the 'z's.
+ FfxFloat32x2 p0 = fp * ffxAsFloat(con1.xy) + ffxAsFloat(con1.zw);
+
+ // These are from p0 to avoid pulling two constants on pre-Navi hardware.
+ FfxFloat32x2 p1 = p0 + ffxAsFloat(con2.xy);
+ FfxFloat32x2 p2 = p0 + ffxAsFloat(con2.zw);
+ FfxFloat32x2 p3 = p0 + ffxAsFloat(con3.xy);
+ FfxFloat32x4 bczzR = FsrEasuRF(p0);
+ FfxFloat32x4 bczzG = FsrEasuGF(p0);
+ FfxFloat32x4 bczzB = FsrEasuBF(p0);
+ FfxFloat32x4 ijfeR = FsrEasuRF(p1);
+ FfxFloat32x4 ijfeG = FsrEasuGF(p1);
+ FfxFloat32x4 ijfeB = FsrEasuBF(p1);
+ FfxFloat32x4 klhgR = FsrEasuRF(p2);
+ FfxFloat32x4 klhgG = FsrEasuGF(p2);
+ FfxFloat32x4 klhgB = FsrEasuBF(p2);
+ FfxFloat32x4 zzonR = FsrEasuRF(p3);
+ FfxFloat32x4 zzonG = FsrEasuGF(p3);
+ FfxFloat32x4 zzonB = FsrEasuBF(p3);
+
+ // Simplest multi-channel approximate luma possible (luma times 2, in 2 FMA/MAD).
+ FfxFloat32x4 bczzL = bczzB * ffxBroadcast4(0.5) + (bczzR * ffxBroadcast4(0.5) + bczzG);
+ FfxFloat32x4 ijfeL = ijfeB * ffxBroadcast4(0.5) + (ijfeR * ffxBroadcast4(0.5) + ijfeG);
+ FfxFloat32x4 klhgL = klhgB * ffxBroadcast4(0.5) + (klhgR * ffxBroadcast4(0.5) + klhgG);
+ FfxFloat32x4 zzonL = zzonB * ffxBroadcast4(0.5) + (zzonR * ffxBroadcast4(0.5) + zzonG);
+
+ // Rename.
+ FfxFloat32 bL = bczzL.x;
+ FfxFloat32 cL = bczzL.y;
+ FfxFloat32 iL = ijfeL.x;
+ FfxFloat32 jL = ijfeL.y;
+ FfxFloat32 fL = ijfeL.z;
+ FfxFloat32 eL = ijfeL.w;
+ FfxFloat32 kL = klhgL.x;
+ FfxFloat32 lL = klhgL.y;
+ FfxFloat32 hL = klhgL.z;
+ FfxFloat32 gL = klhgL.w;
+ FfxFloat32 oL = zzonL.z;
+ FfxFloat32 nL = zzonL.w;
+
+ // Accumulate for bilinear interpolation.
+ FfxFloat32x2 dir = ffxBroadcast2(0.0);
+ FfxFloat32 len = FfxFloat32(0.0);
+ fsrEasuSetFloat(dir, len, pp, FFXM_TRUE, FFXM_FALSE, FFXM_FALSE, FFXM_FALSE, bL, eL, fL, gL, jL);
+ fsrEasuSetFloat(dir, len, pp, FFXM_FALSE, FFXM_TRUE, FFXM_FALSE, FFXM_FALSE, cL, fL, gL, hL, kL);
+ fsrEasuSetFloat(dir, len, pp, FFXM_FALSE, FFXM_FALSE, FFXM_TRUE, FFXM_FALSE, fL, iL, jL, kL, nL);
+ fsrEasuSetFloat(dir, len, pp, FFXM_FALSE, FFXM_FALSE, FFXM_FALSE, FFXM_TRUE, gL, jL, kL, lL, oL);
+
+ // Normalize with approximation, and cleanup close to zero.
+ FfxFloat32x2 dir2 = dir * dir;
+ FfxFloat32 dirR = dir2.x + dir2.y;
+ FfxBoolean zro = dirR < FfxFloat32(1.0 / 32768.0);
+ dirR = ffxApproximateReciprocalSquareRoot(dirR);
+ dirR = zro ? FfxFloat32(1.0) : dirR;
+ dir.x = zro ? FfxFloat32(1.0) : dir.x;
+ dir *= ffxBroadcast2(dirR);
+
+ // Transform from {0 to 2} to {0 to 1} range, and shape with square.
+ len = len * FfxFloat32(0.5);
+ len *= len;
+
+ // Stretch kernel {1.0 vert|horz, to sqrt(2.0) on diagonal}.
+ FfxFloat32 stretch = (dir.x * dir.x + dir.y * dir.y) * ffxApproximateReciprocal(max(abs(dir.x), abs(dir.y)));
+
+ // Anisotropic length after rotation,
+ // x := 1.0 lerp to 'stretch' on edges
+ // y := 1.0 lerp to 2x on edges
+ FfxFloat32x2 len2 = FfxFloat32x2(FfxFloat32(1.0) + (stretch - FfxFloat32(1.0)) * len, FfxFloat32(1.0) + FfxFloat32(-0.5) * len);
+
+ // Based on the amount of 'edge',
+ // the window shifts from +/-{sqrt(2.0) to slightly beyond 2.0}.
+ FfxFloat32 lob = FfxFloat32(0.5) + FfxFloat32((1.0 / 4.0 - 0.04) - 0.5) * len;
+
+ // Set distance^2 clipping point to the end of the adjustable window.
+ FfxFloat32 clp = ffxApproximateReciprocal(lob);
+
+ // Accumulation mixed with min/max of 4 nearest.
+ // b c
+ // e f g h
+ // i j k l
+ // n o
+ FfxFloat32x3 min4 =
+ ffxMin(ffxMin3(FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z), FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w), FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)),
+ FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x));
+ FfxFloat32x3 max4 =
+ max(ffxMax3(FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z), FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w), FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)), FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x));
+
+ // Accumulation.
+ FfxFloat32x3 aC = ffxBroadcast3(0.0);
+ FfxFloat32 aW = FfxFloat32(0.0);
+ fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, -1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(bczzR.x, bczzG.x, bczzB.x)); // b
+ fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, -1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(bczzR.y, bczzG.y, bczzB.y)); // c
+ fsrEasuTapFloat(aC, aW, FfxFloat32x2(-1.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.x, ijfeG.x, ijfeB.x)); // i
+ fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)); // j
+ fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z)); // f
+ fsrEasuTapFloat(aC, aW, FfxFloat32x2(-1.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.w, ijfeG.w, ijfeB.w)); // e
+ fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x)); // k
+ fsrEasuTapFloat(aC, aW, FfxFloat32x2(2.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.y, klhgG.y, klhgB.y)); // l
+ fsrEasuTapFloat(aC, aW, FfxFloat32x2(2.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.z, klhgG.z, klhgB.z)); // h
+ fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w)); // g
+ fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.z, zzonG.z, zzonB.z)); // o
+ fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.w, zzonG.w, zzonB.w)); // n
+
+ // Normalize and dering.
+ pix = ffxMin(max4, max(min4, aC * ffxBroadcast3(rcp(aW))));
+}
+#endif // #if defined(FFXM_GPU) && defined(FFXM_FSR_EASU_FLOAT)
+
+#if defined(FFXM_GPU) && FFXM_HALF == 1 && defined(FFXM_FSR_EASU_HALF)
+// Input callback prototypes, need to be implemented by calling shader
+FfxFloat16x4 FsrEasuRH(FfxFloat32x2 p);
+FfxFloat16x4 FsrEasuGH(FfxFloat32x2 p);
+FfxFloat16x4 FsrEasuBH(FfxFloat32x2 p);
+
+// This runs 2 taps in parallel.
+void FsrEasuTapH(
+ FFXM_PARAMETER_INOUT FfxFloat16x2 aCR,
+ FFXM_PARAMETER_INOUT FfxFloat16x2 aCG,
+ FFXM_PARAMETER_INOUT FfxFloat16x2 aCB,
+ FFXM_PARAMETER_INOUT FfxFloat16x2 aW,
+ FFXM_PARAMETER_IN FfxFloat16x2 offX,
+ FFXM_PARAMETER_IN FfxFloat16x2 offY,
+ FFXM_PARAMETER_IN FfxFloat16x2 dir,
+ FFXM_PARAMETER_IN FfxFloat16x2 len,
+ FFXM_PARAMETER_IN FfxFloat16 lob,
+ FFXM_PARAMETER_IN FfxFloat16 clp,
+ FFXM_PARAMETER_IN FfxFloat16x2 cR,
+ FFXM_PARAMETER_IN FfxFloat16x2 cG,
+ FFXM_PARAMETER_IN FfxFloat16x2 cB)
+{
+ FfxFloat16x2 vX, vY;
+ vX = offX * dir.xx + offY * dir.yy;
+ vY = offX * (-dir.yy) + offY * dir.xx;
+ vX *= len.x;
+ vY *= len.y;
+ FfxFloat16x2 d2 = vX * vX + vY * vY;
+ d2 = min(d2, FFXM_BROADCAST_FLOAT16X2(clp));
+ FfxFloat16x2 wB = FFXM_BROADCAST_FLOAT16X2(2.0 / 5.0) * d2 + FFXM_BROADCAST_FLOAT16X2(-1.0);
+ FfxFloat16x2 wA = FFXM_BROADCAST_FLOAT16X2(lob) * d2 + FFXM_BROADCAST_FLOAT16X2(-1.0);
+ wB *= wB;
+ wA *= wA;
+ wB = FFXM_BROADCAST_FLOAT16X2(25.0 / 16.0) * wB + FFXM_BROADCAST_FLOAT16X2(-(25.0 / 16.0 - 1.0));
+ FfxFloat16x2 w = wB * wA;
+ aCR += cR * w;
+ aCG += cG * w;
+ aCB += cB * w;
+ aW += w;
+}
+
+// This runs 2 taps in parallel.
+void FsrEasuSetH(
+ FFXM_PARAMETER_INOUT FfxFloat16x2 dirPX,
+ FFXM_PARAMETER_INOUT FfxFloat16x2 dirPY,
+ FFXM_PARAMETER_INOUT FfxFloat16x2 lenP,
+ FFXM_PARAMETER_IN FfxFloat16x2 pp,
+ FFXM_PARAMETER_IN FfxBoolean biST,
+ FFXM_PARAMETER_IN FfxBoolean biUV,
+ FFXM_PARAMETER_IN FfxFloat16x2 lA,
+ FFXM_PARAMETER_IN FfxFloat16x2 lB,
+ FFXM_PARAMETER_IN FfxFloat16x2 lC,
+ FFXM_PARAMETER_IN FfxFloat16x2 lD,
+ FFXM_PARAMETER_IN FfxFloat16x2 lE)
+{
+ FfxFloat16x2 w = FFXM_BROADCAST_FLOAT16X2(0.0);
+
+ if (biST)
+ w = (FfxFloat16x2(1.0, 0.0) + FfxFloat16x2(-pp.x, pp.x)) * FFXM_BROADCAST_FLOAT16X2(FFXM_BROADCAST_FLOAT16(1.0) - pp.y);
+
+ if (biUV)
+ w = (FfxFloat16x2(1.0, 0.0) + FfxFloat16x2(-pp.x, pp.x)) * FFXM_BROADCAST_FLOAT16X2(pp.y);
+
+ // ABS is not free in the packed FP16 path.
+ FfxFloat16x2 dc = lD - lC;
+ FfxFloat16x2 cb = lC - lB;
+ FfxFloat16x2 lenX = max(abs(dc), abs(cb));
+ lenX = ffxReciprocalHalf(lenX);
+
+ FfxFloat16x2 dirX = lD - lB;
+ dirPX += dirX * w;
+ lenX = ffxSaturate(abs(dirX) * lenX);
+ lenX *= lenX;
+ lenP += lenX * w;
+ FfxFloat16x2 ec = lE - lC;
+ FfxFloat16x2 ca = lC - lA;
+ FfxFloat16x2 lenY = max(abs(ec), abs(ca));
+ lenY = ffxReciprocalHalf(lenY);
+ FfxFloat16x2 dirY = lE - lA;
+ dirPY += dirY * w;
+ lenY = ffxSaturate(abs(dirY) * lenY);
+ lenY *= lenY;
+ lenP += lenY * w;
+}
+
+void FsrEasuH(
+ FFXM_PARAMETER_OUT FfxFloat16x3 pix,
+ FFXM_PARAMETER_IN FfxUInt32x2 ip,
+ FFXM_PARAMETER_IN FfxUInt32x4 con0,
+ FFXM_PARAMETER_IN FfxUInt32x4 con1,
+ FFXM_PARAMETER_IN FfxUInt32x4 con2,
+ FFXM_PARAMETER_IN FfxUInt32x4 con3)
+{
+ FfxFloat32x2 pp = FfxFloat32x2(ip) * ffxAsFloat(con0.xy) + ffxAsFloat(con0.zw);
+ FfxFloat32x2 fp = floor(pp);
+ pp -= fp;
+ FfxFloat16x2 ppp = FfxFloat16x2(pp);
+
+ FfxFloat32x2 p0 = fp * ffxAsFloat(con1.xy) + ffxAsFloat(con1.zw);
+ FfxFloat32x2 p1 = p0 + ffxAsFloat(con2.xy);
+ FfxFloat32x2 p2 = p0 + ffxAsFloat(con2.zw);
+ FfxFloat32x2 p3 = p0 + ffxAsFloat(con3.xy);
+ FfxFloat16x4 bczzR = FsrEasuRH(p0);
+ FfxFloat16x4 bczzG = FsrEasuGH(p0);
+ FfxFloat16x4 bczzB = FsrEasuBH(p0);
+ FfxFloat16x4 ijfeR = FsrEasuRH(p1);
+ FfxFloat16x4 ijfeG = FsrEasuGH(p1);
+ FfxFloat16x4 ijfeB = FsrEasuBH(p1);
+ FfxFloat16x4 klhgR = FsrEasuRH(p2);
+ FfxFloat16x4 klhgG = FsrEasuGH(p2);
+ FfxFloat16x4 klhgB = FsrEasuBH(p2);
+ FfxFloat16x4 zzonR = FsrEasuRH(p3);
+ FfxFloat16x4 zzonG = FsrEasuGH(p3);
+ FfxFloat16x4 zzonB = FsrEasuBH(p3);
+
+ FfxFloat16x4 bczzL = bczzB * FFXM_BROADCAST_FLOAT16X4(0.5) + (bczzR * FFXM_BROADCAST_FLOAT16X4(0.5) + bczzG);
+ FfxFloat16x4 ijfeL = ijfeB * FFXM_BROADCAST_FLOAT16X4(0.5) + (ijfeR * FFXM_BROADCAST_FLOAT16X4(0.5) + ijfeG);
+ FfxFloat16x4 klhgL = klhgB * FFXM_BROADCAST_FLOAT16X4(0.5) + (klhgR * FFXM_BROADCAST_FLOAT16X4(0.5) + klhgG);
+ FfxFloat16x4 zzonL = zzonB * FFXM_BROADCAST_FLOAT16X4(0.5) + (zzonR * FFXM_BROADCAST_FLOAT16X4(0.5) + zzonG);
+ FfxFloat16 bL = bczzL.x;
+ FfxFloat16 cL = bczzL.y;
+ FfxFloat16 iL = ijfeL.x;
+ FfxFloat16 jL = ijfeL.y;
+ FfxFloat16 fL = ijfeL.z;
+ FfxFloat16 eL = ijfeL.w;
+ FfxFloat16 kL = klhgL.x;
+ FfxFloat16 lL = klhgL.y;
+ FfxFloat16 hL = klhgL.z;
+ FfxFloat16 gL = klhgL.w;
+ FfxFloat16 oL = zzonL.z;
+ FfxFloat16 nL = zzonL.w;
+
+ // This part is different, accumulating 2 taps in parallel.
+ FfxFloat16x2 dirPX = FFXM_BROADCAST_FLOAT16X2(0.0);
+ FfxFloat16x2 dirPY = FFXM_BROADCAST_FLOAT16X2(0.0);
+ FfxFloat16x2 lenP = FFXM_BROADCAST_FLOAT16X2(0.0);
+ FsrEasuSetH(dirPX,
+ dirPY,
+ lenP,
+ ppp,
+ FfxBoolean(true),
+ FfxBoolean(false),
+ FfxFloat16x2(bL, cL),
+ FfxFloat16x2(eL, fL),
+ FfxFloat16x2(fL, gL),
+ FfxFloat16x2(gL, hL),
+ FfxFloat16x2(jL, kL));
+ FsrEasuSetH(dirPX,
+ dirPY,
+ lenP,
+ ppp,
+ FfxBoolean(false),
+ FfxBoolean(true),
+ FfxFloat16x2(fL, gL),
+ FfxFloat16x2(iL, jL),
+ FfxFloat16x2(jL, kL),
+ FfxFloat16x2(kL, lL),
+ FfxFloat16x2(nL, oL));
+ FfxFloat16x2 dir = FfxFloat16x2(dirPX.r + dirPX.g, dirPY.r + dirPY.g);
+ FfxFloat16 len = lenP.r + lenP.g;
+
+ FfxFloat16x2 dir2 = dir * dir;
+ FfxFloat16 dirR = dir2.x + dir2.y;
+ FfxUInt32 zro = FfxUInt32(dirR < FFXM_BROADCAST_FLOAT16(1.0 / 32768.0));
+ dirR = ffxApproximateReciprocalSquareRootHalf(dirR);
+ dirR = (zro > 0) ? FFXM_BROADCAST_FLOAT16(1.0) : dirR;
+ dir.x = (zro > 0) ? FFXM_BROADCAST_FLOAT16(1.0) : dir.x;
+ dir *= FFXM_BROADCAST_FLOAT16X2(dirR);
+ len = len * FFXM_BROADCAST_FLOAT16(0.5);
+ len *= len;
+ FfxFloat16 stretch = (dir.x * dir.x + dir.y * dir.y) * ffxApproximateReciprocalHalf(max(abs(dir.x), abs(dir.y)));
+ FfxFloat16x2 len2 =
+ FfxFloat16x2(FFXM_BROADCAST_FLOAT16(1.0) + (stretch - FFXM_BROADCAST_FLOAT16(1.0)) * len, FFXM_BROADCAST_FLOAT16(1.0) + FFXM_BROADCAST_FLOAT16(-0.5) * len);
+ FfxFloat16 lob = FFXM_BROADCAST_FLOAT16(0.5) + FFXM_BROADCAST_FLOAT16((1.0 / 4.0 - 0.04) - 0.5) * len;
+ FfxFloat16 clp = ffxApproximateReciprocalHalf(lob);
+
+ // FP16 is different, using packed trick to do min and max in same operation.
+ FfxFloat16x2 bothR =
+ max(max(FfxFloat16x2(-ijfeR.z, ijfeR.z), FfxFloat16x2(-klhgR.w, klhgR.w)), max(FfxFloat16x2(-ijfeR.y, ijfeR.y), FfxFloat16x2(-klhgR.x, klhgR.x)));
+ FfxFloat16x2 bothG =
+ max(max(FfxFloat16x2(-ijfeG.z, ijfeG.z), FfxFloat16x2(-klhgG.w, klhgG.w)), max(FfxFloat16x2(-ijfeG.y, ijfeG.y), FfxFloat16x2(-klhgG.x, klhgG.x)));
+ FfxFloat16x2 bothB =
+ max(max(FfxFloat16x2(-ijfeB.z, ijfeB.z), FfxFloat16x2(-klhgB.w, klhgB.w)), max(FfxFloat16x2(-ijfeB.y, ijfeB.y), FfxFloat16x2(-klhgB.x, klhgB.x)));
+
+ // This part is different for FP16, working pairs of taps at a time.
+ FfxFloat16x2 pR = FFXM_BROADCAST_FLOAT16X2(0.0);
+ FfxFloat16x2 pG = FFXM_BROADCAST_FLOAT16X2(0.0);
+ FfxFloat16x2 pB = FFXM_BROADCAST_FLOAT16X2(0.0);
+ FfxFloat16x2 pW = FFXM_BROADCAST_FLOAT16X2(0.0);
+ FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(0.0, 1.0) - ppp.xx, FfxFloat16x2(-1.0, -1.0) - ppp.yy, dir, len2, lob, clp, bczzR.xy, bczzG.xy, bczzB.xy);
+ FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(-1.0, 0.0) - ppp.xx, FfxFloat16x2(1.0, 1.0) - ppp.yy, dir, len2, lob, clp, ijfeR.xy, ijfeG.xy, ijfeB.xy);
+ FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(0.0, -1.0) - ppp.xx, FfxFloat16x2(0.0, 0.0) - ppp.yy, dir, len2, lob, clp, ijfeR.zw, ijfeG.zw, ijfeB.zw);
+ FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(1.0, 2.0) - ppp.xx, FfxFloat16x2(1.0, 1.0) - ppp.yy, dir, len2, lob, clp, klhgR.xy, klhgG.xy, klhgB.xy);
+ FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(2.0, 1.0) - ppp.xx, FfxFloat16x2(0.0, 0.0) - ppp.yy, dir, len2, lob, clp, klhgR.zw, klhgG.zw, klhgB.zw);
+ FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(1.0, 0.0) - ppp.xx, FfxFloat16x2(2.0, 2.0) - ppp.yy, dir, len2, lob, clp, zzonR.zw, zzonG.zw, zzonB.zw);
+ FfxFloat16x3 aC = FfxFloat16x3(pR.x + pR.y, pG.x + pG.y, pB.x + pB.y);
+ FfxFloat16 aW = pW.x + pW.y;
+
+ // Slightly different for FP16 version due to combined min and max.
+ pix = min(FfxFloat16x3(bothR.y, bothG.y, bothB.y), max(-FfxFloat16x3(bothR.x, bothG.x, bothB.x), aC * FFXM_BROADCAST_FLOAT16X3(ffxReciprocalHalf(aW))));
+}
+#endif // #if defined(FFXM_GPU) && defined(FFXM_HALF) && defined(FFXM_FSR_EASU_HALF)
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+// FSR - [RCAS] ROBUST CONTRAST ADAPTIVE SHARPENING
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// CAS uses a simplified mechanism to convert local contrast into a variable amount of sharpness.
+// RCAS uses a more exact mechanism, solving for the maximum local sharpness possible before clipping.
+// RCAS also has a built in process to limit sharpening of what it detects as possible noise.
+// RCAS sharper does not support scaling, as it should be applied after EASU scaling.
+// Pass EASU output straight into RCAS, no color conversions necessary.
+//------------------------------------------------------------------------------------------------------------------------------
+// RCAS is based on the following logic.
+// RCAS uses a 5 tap filter in a cross pattern (same as CAS),
+// w n
+// w 1 w for taps w m e
+// w s
+// Where 'w' is the negative lobe weight.
+// output = (w*(n+e+w+s)+m)/(4*w+1)
+// RCAS solves for 'w' by seeing where the signal might clip out of the {0 to 1} input range,
+// 0 == (w*(n+e+w+s)+m)/(4*w+1) -> w = -m/(n+e+w+s)
+// 1 == (w*(n+e+w+s)+m)/(4*w+1) -> w = (1-m)/(n+e+w+s-4*1)
+// Then chooses the 'w' which results in no clipping, limits 'w', and multiplies by the 'sharp' amount.
+// This solution above has issues with MSAA input as the steps along the gradient cause edge detection issues.
+// So RCAS uses 4x the maximum and 4x the minimum (depending on equation)in place of the individual taps.
+// As well as switching from 'm' to either the minimum or maximum (depending on side), to help in energy conservation.
+// This stabilizes RCAS.
+// RCAS does a simple highpass which is normalized against the local contrast then shaped,
+// 0.25
+// 0.25 -1 0.25
+// 0.25
+// This is used as a noise detection filter, to reduce the effect of RCAS on grain, and focus on real edges.
+//
+// GLSL example for the required callbacks :
+//
+// FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p){return FfxFloat16x4(imageLoad(imgSrc,FfxInt32x2(p)));}
+// void FsrRcasInputH(inout FfxFloat16 r,inout FfxFloat16 g,inout FfxFloat16 b)
+// {
+// //do any simple input color conversions here or leave empty if none needed
+// }
+//
+// FsrRcasCon need to be called from the CPU or GPU to set up constants.
+// Including a GPU example here, the 'con' value would be stored out to a constant buffer.
+//
+// FfxUInt32x4 con;
+// FsrRcasCon(con,
+// 0.0); // The scale is {0.0 := maximum sharpness, to N>0, where N is the number of stops (halving) of the reduction of sharpness}.
+// ---------------
+// RCAS sharpening supports a CAS-like pass-through alpha via,
+// #define FSR_RCAS_PASSTHROUGH_ALPHA 1
+// RCAS also supports a define to enable a more expensive path to avoid some sharpening of noise.
+// Would suggest it is better to apply film grain after RCAS sharpening (and after scaling) instead of using this define,
+// #define FSR_RCAS_DENOISE 1
+//==============================================================================================================================
+// This is set at the limit of providing unnatural results for sharpening.
+#define FSR_RCAS_LIMIT (0.25-(1.0/16.0))
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// CONSTANT SETUP
+//==============================================================================================================================
+// Call to setup required constant values (works on CPU or GPU).
+ FFXM_STATIC void FsrRcasCon(FfxUInt32x4 con,
+ // The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}.
+ FfxFloat32 sharpness)
+ {
+ // Transform from stops to linear value.
+ sharpness = exp2(-sharpness);
+ FfxFloat32x2 hSharp = {sharpness, sharpness};
+ con[0] = ffxAsUInt32(sharpness);
+ con[1] = packHalf2x16(hSharp);
+ con[2] = 0;
+ con[3] = 0;
+ }
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// NON-PACKED 32-BIT VERSION
+//==============================================================================================================================
+#if defined(FFXM_GPU)&&defined(FSR_RCAS_F)
+ // Input callback prototypes that need to be implemented by calling shader
+ FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p);
+ void FsrRcasInputF(inout FfxFloat32 r,inout FfxFloat32 g,inout FfxFloat32 b);
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrRcasF(out FfxFloat32 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy.
+ out FfxFloat32 pixG,
+ out FfxFloat32 pixB,
+#ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+ out FfxFloat32 pixA,
+#endif
+ FfxUInt32x2 ip, // Integer pixel position in output.
+ FfxUInt32x4 con)
+ { // Constant generated by RcasSetup().
+ // Algorithm uses minimal 3x3 pixel neighborhood.
+ // b
+ // d e f
+ // h
+ FfxInt32x2 sp = FfxInt32x2(ip);
+ FfxFloat32x3 b = FsrRcasLoadF(sp + FfxInt32x2(0, -1)).rgb;
+ FfxFloat32x3 d = FsrRcasLoadF(sp + FfxInt32x2(-1, 0)).rgb;
+#ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+ FfxFloat32x4 ee = FsrRcasLoadF(sp);
+ FfxFloat32x3 e = ee.rgb;
+ pixA = ee.a;
+#else
+ FfxFloat32x3 e = FsrRcasLoadF(sp).rgb;
+#endif
+ FfxFloat32x3 f = FsrRcasLoadF(sp + FfxInt32x2(1, 0)).rgb;
+ FfxFloat32x3 h = FsrRcasLoadF(sp + FfxInt32x2(0, 1)).rgb;
+ // Rename (32-bit) or regroup (16-bit).
+ FfxFloat32 bR = b.r;
+ FfxFloat32 bG = b.g;
+ FfxFloat32 bB = b.b;
+ FfxFloat32 dR = d.r;
+ FfxFloat32 dG = d.g;
+ FfxFloat32 dB = d.b;
+ FfxFloat32 eR = e.r;
+ FfxFloat32 eG = e.g;
+ FfxFloat32 eB = e.b;
+ FfxFloat32 fR = f.r;
+ FfxFloat32 fG = f.g;
+ FfxFloat32 fB = f.b;
+ FfxFloat32 hR = h.r;
+ FfxFloat32 hG = h.g;
+ FfxFloat32 hB = h.b;
+ // Run optional input transform.
+ FsrRcasInputF(bR, bG, bB);
+ FsrRcasInputF(dR, dG, dB);
+ FsrRcasInputF(eR, eG, eB);
+ FsrRcasInputF(fR, fG, fB);
+ FsrRcasInputF(hR, hG, hB);
+ // Luma times 2.
+ FfxFloat32 bL = bB * FfxFloat32(0.5) + (bR * FfxFloat32(0.5) + bG);
+ FfxFloat32 dL = dB * FfxFloat32(0.5) + (dR * FfxFloat32(0.5) + dG);
+ FfxFloat32 eL = eB * FfxFloat32(0.5) + (eR * FfxFloat32(0.5) + eG);
+ FfxFloat32 fL = fB * FfxFloat32(0.5) + (fR * FfxFloat32(0.5) + fG);
+ FfxFloat32 hL = hB * FfxFloat32(0.5) + (hR * FfxFloat32(0.5) + hG);
+ // Noise detection.
+ FfxFloat32 nz = FfxFloat32(0.25) * bL + FfxFloat32(0.25) * dL + FfxFloat32(0.25) * fL + FfxFloat32(0.25) * hL - eL;
+ nz = ffxSaturate(abs(nz) * ffxApproximateReciprocalMedium(ffxMax3(ffxMax3(bL, dL, eL), fL, hL) - ffxMin3(ffxMin3(bL, dL, eL), fL, hL)));
+ nz = FfxFloat32(-0.5) * nz + FfxFloat32(1.0);
+ // Min and max of ring.
+ FfxFloat32 mn4R = ffxMin(ffxMin3(bR, dR, fR), hR);
+ FfxFloat32 mn4G = ffxMin(ffxMin3(bG, dG, fG), hG);
+ FfxFloat32 mn4B = ffxMin(ffxMin3(bB, dB, fB), hB);
+ FfxFloat32 mx4R = max(ffxMax3(bR, dR, fR), hR);
+ FfxFloat32 mx4G = max(ffxMax3(bG, dG, fG), hG);
+ FfxFloat32 mx4B = max(ffxMax3(bB, dB, fB), hB);
+ // Immediate constants for peak range.
+ FfxFloat32x2 peakC = FfxFloat32x2(1.0, -1.0 * 4.0);
+ // Limiters, these need to be high precision RCPs.
+ FfxFloat32 hitMinR = mn4R * rcp(FfxFloat32(4.0) * mx4R);
+ FfxFloat32 hitMinG = mn4G * rcp(FfxFloat32(4.0) * mx4G);
+ FfxFloat32 hitMinB = mn4B * rcp(FfxFloat32(4.0) * mx4B);
+ FfxFloat32 hitMaxR = (peakC.x - mx4R) * rcp(FfxFloat32(4.0) * mn4R + peakC.y);
+ FfxFloat32 hitMaxG = (peakC.x - mx4G) * rcp(FfxFloat32(4.0) * mn4G + peakC.y);
+ FfxFloat32 hitMaxB = (peakC.x - mx4B) * rcp(FfxFloat32(4.0) * mn4B + peakC.y);
+ FfxFloat32 lobeR = max(-hitMinR, hitMaxR);
+ FfxFloat32 lobeG = max(-hitMinG, hitMaxG);
+ FfxFloat32 lobeB = max(-hitMinB, hitMaxB);
+ FfxFloat32 lobe = max(FfxFloat32(-FSR_RCAS_LIMIT), ffxMin(ffxMax3(lobeR, lobeG, lobeB), FfxFloat32(0.0))) * ffxAsFloat
+ (con.x);
+ // Apply noise removal.
+#ifdef FSR_RCAS_DENOISE
+ lobe *= nz;
+#endif
+ // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
+ FfxFloat32 rcpL = ffxApproximateReciprocalMedium(FfxFloat32(4.0) * lobe + FfxFloat32(1.0));
+ pixR = (lobe * bR + lobe * dR + lobe * hR + lobe * fR + eR) * rcpL;
+ pixG = (lobe * bG + lobe * dG + lobe * hG + lobe * fG + eG) * rcpL;
+ pixB = (lobe * bB + lobe * dB + lobe * hB + lobe * fB + eB) * rcpL;
+ return;
+ }
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// NON-PACKED 16-BIT VERSION
+//==============================================================================================================================
+#if defined(FFXM_GPU) && FFXM_HALF == 1 && defined(FSR_RCAS_H)
+ // Input callback prototypes that need to be implemented by calling shader
+ FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p);
+ void FsrRcasInputH(inout FfxFloat16 r,inout FfxFloat16 g,inout FfxFloat16 b);
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrRcasH(
+ out FfxFloat16 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy.
+ out FfxFloat16 pixG,
+ out FfxFloat16 pixB,
+ #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+ out FfxFloat16 pixA,
+ #endif
+ FfxUInt32x2 ip, // Integer pixel position in output.
+ FfxUInt32x4 con){ // Constant generated by RcasSetup().
+ // Sharpening algorithm uses minimal 3x3 pixel neighborhood.
+ // b
+ // d e f
+ // h
+ FfxInt16x2 sp=FfxInt16x2(ip);
+ FfxFloat16x3 b=FsrRcasLoadH(sp+FfxInt16x2( 0,-1)).rgb;
+ FfxFloat16x3 d=FsrRcasLoadH(sp+FfxInt16x2(-1, 0)).rgb;
+ #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+ FfxFloat16x4 ee=FsrRcasLoadH(sp);
+ FfxFloat16x3 e=ee.rgb;pixA=ee.a;
+ #else
+ FfxFloat16x3 e=FsrRcasLoadH(sp).rgb;
+ #endif
+ FfxFloat16x3 f=FsrRcasLoadH(sp+FfxInt16x2( 1, 0)).rgb;
+ FfxFloat16x3 h=FsrRcasLoadH(sp+FfxInt16x2( 0, 1)).rgb;
+ // Rename (32-bit) or regroup (16-bit).
+ FfxFloat16 bR=b.r;
+ FfxFloat16 bG=b.g;
+ FfxFloat16 bB=b.b;
+ FfxFloat16 dR=d.r;
+ FfxFloat16 dG=d.g;
+ FfxFloat16 dB=d.b;
+ FfxFloat16 eR=e.r;
+ FfxFloat16 eG=e.g;
+ FfxFloat16 eB=e.b;
+ FfxFloat16 fR=f.r;
+ FfxFloat16 fG=f.g;
+ FfxFloat16 fB=f.b;
+ FfxFloat16 hR=h.r;
+ FfxFloat16 hG=h.g;
+ FfxFloat16 hB=h.b;
+ // Run optional input transform.
+ FsrRcasInputH(bR,bG,bB);
+ FsrRcasInputH(dR,dG,dB);
+ FsrRcasInputH(eR,eG,eB);
+ FsrRcasInputH(fR,fG,fB);
+ FsrRcasInputH(hR,hG,hB);
+ // Luma times 2.
+ FfxFloat16 bL=bB*FFXM_BROADCAST_FLOAT16(0.5)+(bR*FFXM_BROADCAST_FLOAT16(0.5)+bG);
+ FfxFloat16 dL=dB*FFXM_BROADCAST_FLOAT16(0.5)+(dR*FFXM_BROADCAST_FLOAT16(0.5)+dG);
+ FfxFloat16 eL=eB*FFXM_BROADCAST_FLOAT16(0.5)+(eR*FFXM_BROADCAST_FLOAT16(0.5)+eG);
+ FfxFloat16 fL=fB*FFXM_BROADCAST_FLOAT16(0.5)+(fR*FFXM_BROADCAST_FLOAT16(0.5)+fG);
+ FfxFloat16 hL=hB*FFXM_BROADCAST_FLOAT16(0.5)+(hR*FFXM_BROADCAST_FLOAT16(0.5)+hG);
+ // Noise detection.
+ FfxFloat16 nz=FFXM_BROADCAST_FLOAT16(0.25)*bL+FFXM_BROADCAST_FLOAT16(0.25)*dL+FFXM_BROADCAST_FLOAT16(0.25)*fL+FFXM_BROADCAST_FLOAT16(0.25)*hL-eL;
+ nz=ffxSaturate(abs(nz)*ffxApproximateReciprocalMediumHalf(ffxMax3Half(ffxMax3Half(bL,dL,eL),fL,hL)-ffxMin3Half(ffxMin3Half(bL,dL,eL),fL,hL)));
+ nz=FFXM_BROADCAST_FLOAT16(-0.5)*nz+FFXM_BROADCAST_FLOAT16(1.0);
+ // Min and max of ring.
+ FfxFloat16 mn4R=min(ffxMin3Half(bR,dR,fR),hR);
+ FfxFloat16 mn4G=min(ffxMin3Half(bG,dG,fG),hG);
+ FfxFloat16 mn4B=min(ffxMin3Half(bB,dB,fB),hB);
+ FfxFloat16 mx4R=max(ffxMax3Half(bR,dR,fR),hR);
+ FfxFloat16 mx4G=max(ffxMax3Half(bG,dG,fG),hG);
+ FfxFloat16 mx4B=max(ffxMax3Half(bB,dB,fB),hB);
+ // Immediate constants for peak range.
+ FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0);
+ // Limiters, these need to be high precision RCPs.
+ FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16(4.0)*mx4R);
+ FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16(4.0)*mx4G);
+ FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16(4.0)*mx4B);
+ FfxFloat16 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16(4.0)*mn4R+peakC.y);
+ FfxFloat16 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16(4.0)*mn4G+peakC.y);
+ FfxFloat16 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16(4.0)*mn4B+peakC.y);
+ FfxFloat16 lobeR=max(-hitMinR,hitMaxR);
+ FfxFloat16 lobeG=max(-hitMinG,hitMaxG);
+ FfxFloat16 lobeB=max(-hitMinB,hitMaxB);
+ FfxFloat16 lobe=max(FFXM_BROADCAST_FLOAT16(-FSR_RCAS_LIMIT),min(ffxMax3Half(lobeR,lobeG,lobeB),FFXM_BROADCAST_FLOAT16(0.0)))*FFXM_UINT32_TO_FLOAT16X2(con.y).x;
+ // Apply noise removal.
+ #ifdef FSR_RCAS_DENOISE
+ lobe*=nz;
+ #endif
+ // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
+ FfxFloat16 rcpL=ffxApproximateReciprocalMediumHalf(FFXM_BROADCAST_FLOAT16(4.0)*lobe+FFXM_BROADCAST_FLOAT16(1.0));
+ pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL;
+ pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL;
+ pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;
+}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+// PACKED 16-BIT VERSION
+//==============================================================================================================================
+#if defined(FFXM_GPU)&& FFXM_HALF == 1 && defined(FSR_RCAS_HX2)
+ // Input callback prototypes that need to be implemented by the calling shader
+ FfxFloat16x4 FsrRcasLoadHx2(FfxInt16x2 p);
+ void FsrRcasInputHx2(inout FfxFloat16x2 r,inout FfxFloat16x2 g,inout FfxFloat16x2 b);
+//------------------------------------------------------------------------------------------------------------------------------
+ // Can be used to convert from packed Structures of Arrays to Arrays of Structures for store.
+ void FsrRcasDepackHx2(out FfxFloat16x4 pix0,out FfxFloat16x4 pix1,FfxFloat16x2 pixR,FfxFloat16x2 pixG,FfxFloat16x2 pixB){
+ #ifdef FFXM_HLSL
+ // Invoke a slower path for DX only, since it won't allow uninitialized values.
+ pix0.a=pix1.a=0.0;
+ #endif
+ pix0.rgb=FfxFloat16x3(pixR.x,pixG.x,pixB.x);
+ pix1.rgb=FfxFloat16x3(pixR.y,pixG.y,pixB.y);}
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrRcasHx2(
+ // Output values are for 2 8x8 tiles in a 16x8 region.
+ // pix.x = left 8x8 tile
+ // pix.y = right 8x8 tile
+ // This enables later processing to easily be packed as well.
+ out FfxFloat16x2 pixR,
+ out FfxFloat16x2 pixG,
+ out FfxFloat16x2 pixB,
+ #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+ out FfxFloat16x2 pixA,
+ #endif
+ FfxUInt32x2 ip, // Integer pixel position in output.
+ FfxUInt32x4 con){ // Constant generated by RcasSetup().
+ // No scaling algorithm uses minimal 3x3 pixel neighborhood.
+ FfxInt16x2 sp0=FfxInt16x2(ip);
+ FfxFloat16x3 b0=FsrRcasLoadHx2(sp0+FfxInt16x2( 0,-1)).rgb;
+ FfxFloat16x3 d0=FsrRcasLoadHx2(sp0+FfxInt16x2(-1, 0)).rgb;
+ #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+ FfxFloat16x4 ee0=FsrRcasLoadHx2(sp0);
+ FfxFloat16x3 e0=ee0.rgb;pixA.r=ee0.a;
+ #else
+ FfxFloat16x3 e0=FsrRcasLoadHx2(sp0).rgb;
+ #endif
+ FfxFloat16x3 f0=FsrRcasLoadHx2(sp0+FfxInt16x2( 1, 0)).rgb;
+ FfxFloat16x3 h0=FsrRcasLoadHx2(sp0+FfxInt16x2( 0, 1)).rgb;
+ FfxInt16x2 sp1=sp0+FfxInt16x2(8,0);
+ FfxFloat16x3 b1=FsrRcasLoadHx2(sp1+FfxInt16x2( 0,-1)).rgb;
+ FfxFloat16x3 d1=FsrRcasLoadHx2(sp1+FfxInt16x2(-1, 0)).rgb;
+ #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+ FfxFloat16x4 ee1=FsrRcasLoadHx2(sp1);
+ FfxFloat16x3 e1=ee1.rgb;pixA.g=ee1.a;
+ #else
+ FfxFloat16x3 e1=FsrRcasLoadHx2(sp1).rgb;
+ #endif
+ FfxFloat16x3 f1=FsrRcasLoadHx2(sp1+FfxInt16x2( 1, 0)).rgb;
+ FfxFloat16x3 h1=FsrRcasLoadHx2(sp1+FfxInt16x2( 0, 1)).rgb;
+ // Arrays of Structures to Structures of Arrays conversion.
+ FfxFloat16x2 bR=FfxFloat16x2(b0.r,b1.r);
+ FfxFloat16x2 bG=FfxFloat16x2(b0.g,b1.g);
+ FfxFloat16x2 bB=FfxFloat16x2(b0.b,b1.b);
+ FfxFloat16x2 dR=FfxFloat16x2(d0.r,d1.r);
+ FfxFloat16x2 dG=FfxFloat16x2(d0.g,d1.g);
+ FfxFloat16x2 dB=FfxFloat16x2(d0.b,d1.b);
+ FfxFloat16x2 eR=FfxFloat16x2(e0.r,e1.r);
+ FfxFloat16x2 eG=FfxFloat16x2(e0.g,e1.g);
+ FfxFloat16x2 eB=FfxFloat16x2(e0.b,e1.b);
+ FfxFloat16x2 fR=FfxFloat16x2(f0.r,f1.r);
+ FfxFloat16x2 fG=FfxFloat16x2(f0.g,f1.g);
+ FfxFloat16x2 fB=FfxFloat16x2(f0.b,f1.b);
+ FfxFloat16x2 hR=FfxFloat16x2(h0.r,h1.r);
+ FfxFloat16x2 hG=FfxFloat16x2(h0.g,h1.g);
+ FfxFloat16x2 hB=FfxFloat16x2(h0.b,h1.b);
+ // Run optional input transform.
+ FsrRcasInputHx2(bR,bG,bB);
+ FsrRcasInputHx2(dR,dG,dB);
+ FsrRcasInputHx2(eR,eG,eB);
+ FsrRcasInputHx2(fR,fG,fB);
+ FsrRcasInputHx2(hR,hG,hB);
+ // Luma times 2.
+ FfxFloat16x2 bL=bB*FFXM_BROADCAST_FLOAT16X2(0.5)+(bR*FFXM_BROADCAST_FLOAT16X2(0.5)+bG);
+ FfxFloat16x2 dL=dB*FFXM_BROADCAST_FLOAT16X2(0.5)+(dR*FFXM_BROADCAST_FLOAT16X2(0.5)+dG);
+ FfxFloat16x2 eL=eB*FFXM_BROADCAST_FLOAT16X2(0.5)+(eR*FFXM_BROADCAST_FLOAT16X2(0.5)+eG);
+ FfxFloat16x2 fL=fB*FFXM_BROADCAST_FLOAT16X2(0.5)+(fR*FFXM_BROADCAST_FLOAT16X2(0.5)+fG);
+ FfxFloat16x2 hL=hB*FFXM_BROADCAST_FLOAT16X2(0.5)+(hR*FFXM_BROADCAST_FLOAT16X2(0.5)+hG);
+ // Noise detection.
+ FfxFloat16x2 nz=FFXM_BROADCAST_FLOAT16X2(0.25)*bL+FFXM_BROADCAST_FLOAT16X2(0.25)*dL+FFXM_BROADCAST_FLOAT16X2(0.25)*fL+FFXM_BROADCAST_FLOAT16X2(0.25)*hL-eL;
+ nz=ffxSaturate(abs(nz)*ffxApproximateReciprocalMediumHalf(ffxMax3Half(ffxMax3Half(bL,dL,eL),fL,hL)-ffxMin3Half(ffxMin3Half(bL,dL,eL),fL,hL)));
+ nz=FFXM_BROADCAST_FLOAT16X2(-0.5)*nz+FFXM_BROADCAST_FLOAT16X2(1.0);
+ // Min and max of ring.
+ FfxFloat16x2 mn4R=min(ffxMin3Half(bR,dR,fR),hR);
+ FfxFloat16x2 mn4G=min(ffxMin3Half(bG,dG,fG),hG);
+ FfxFloat16x2 mn4B=min(ffxMin3Half(bB,dB,fB),hB);
+ FfxFloat16x2 mx4R=max(ffxMax3Half(bR,dR,fR),hR);
+ FfxFloat16x2 mx4G=max(ffxMax3Half(bG,dG,fG),hG);
+ FfxFloat16x2 mx4B=max(ffxMax3Half(bB,dB,fB),hB);
+ // Immediate constants for peak range.
+ FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0);
+ // Limiters, these need to be high precision RCPs.
+ FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16X2(4.0)*mx4R);
+ FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16X2(4.0)*mx4G);
+ FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16X2(4.0)*mx4B);
+ FfxFloat16x2 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16X2(4.0)*mn4R+peakC.y);
+ FfxFloat16x2 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16X2(4.0)*mn4G+peakC.y);
+ FfxFloat16x2 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFXM_BROADCAST_FLOAT16X2(4.0)*mn4B+peakC.y);
+ FfxFloat16x2 lobeR=max(-hitMinR,hitMaxR);
+ FfxFloat16x2 lobeG=max(-hitMinG,hitMaxG);
+ FfxFloat16x2 lobeB=max(-hitMinB,hitMaxB);
+ FfxFloat16x2 lobe=max(FFXM_BROADCAST_FLOAT16X2(-FSR_RCAS_LIMIT),min(ffxMax3Half(lobeR,lobeG,lobeB),FFXM_BROADCAST_FLOAT16X2(0.0)))*FFXM_BROADCAST_FLOAT16X2(FFXM_UINT32_TO_FLOAT16X2(con.y).x);
+ // Apply noise removal.
+ #ifdef FSR_RCAS_DENOISE
+ lobe*=nz;
+ #endif
+ // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
+ FfxFloat16x2 rcpL=ffxApproximateReciprocalMediumHalf(FFXM_BROADCAST_FLOAT16X2(4.0)*lobe+FFXM_BROADCAST_FLOAT16X2(1.0));
+ pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL;
+ pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL;
+ pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+// FSR - [LFGA] LINEAR FILM GRAIN APPLICATOR
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// Adding output-resolution film grain after scaling is a good way to mask both rendering and scaling artifacts.
+// Suggest using tiled blue noise as film grain input, with peak noise frequency set for a specific look and feel.
+// The 'Lfga*()' functions provide a convenient way to introduce grain.
+// These functions limit grain based on distance to signal limits.
+// This is done so that the grain is temporally energy preserving, and thus won't modify image tonality.
+// Grain application should be done in a linear colorspace.
+// The grain should be temporally changing, but have a temporal sum per pixel that adds to zero (non-biased).
+//------------------------------------------------------------------------------------------------------------------------------
+// Usage,
+// FsrLfga*(
+// color, // In/out linear colorspace color {0 to 1} ranged.
+// grain, // Per pixel grain texture value {-0.5 to 0.5} ranged, input is 3-channel to support colored grain.
+// amount); // Amount of grain (0 to 1} ranged.
+//------------------------------------------------------------------------------------------------------------------------------
+// Example if grain texture is monochrome: 'FsrLfgaF(color,ffxBroadcast3(grain),amount)'
+//==============================================================================================================================
+#if defined(FFXM_GPU)
+ // Maximum grain is the minimum distance to the signal limit.
+ void FsrLfgaF(inout FfxFloat32x3 c, FfxFloat32x3 t, FfxFloat32 a)
+ {
+ c += (t * ffxBroadcast3(a)) * ffxMin(ffxBroadcast3(1.0) - c, c);
+ }
+#endif
+//==============================================================================================================================
+#if defined(FFXM_GPU)&& FFXM_HALF == 1
+ // Half precision version (slower).
+ void FsrLfgaH(inout FfxFloat16x3 c, FfxFloat16x3 t, FfxFloat16 a)
+ {
+ c += (t * FFXM_BROADCAST_FLOAT16X3(a)) * min(FFXM_BROADCAST_FLOAT16X3(1.0) - c, c);
+ }
+ //------------------------------------------------------------------------------------------------------------------------------
+ // Packed half precision version (faster).
+ void FsrLfgaHx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB,FfxFloat16x2 tR,FfxFloat16x2 tG,FfxFloat16x2 tB,FfxFloat16 a){
+ cR+=(tR*FFXM_BROADCAST_FLOAT16X2(a))*min(FFXM_BROADCAST_FLOAT16X2(1.0)-cR,cR);cG+=(tG*FFXM_BROADCAST_FLOAT16X2(a))*min(FFXM_BROADCAST_FLOAT16X2(1.0)-cG,cG);cB+=(tB*FFXM_BROADCAST_FLOAT16X2(a))*min(FFXM_BROADCAST_FLOAT16X2(1.0)-cB,cB);}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+// FSR - [SRTM] SIMPLE REVERSIBLE TONE-MAPPER
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// This provides a way to take linear HDR color {0 to FP16_MAX} and convert it into a temporary {0 to 1} ranged post-tonemapped linear.
+// The tonemapper preserves RGB ratio, which helps maintain HDR color bleed during filtering.
+//------------------------------------------------------------------------------------------------------------------------------
+// Reversible tonemapper usage,
+// FsrSrtm*(color); // {0 to FP16_MAX} converted to {0 to 1}.
+// FsrSrtmInv*(color); // {0 to 1} converted into {0 to 32768, output peak safe for FP16}.
+//==============================================================================================================================
+#if defined(FFXM_GPU)
+ void FsrSrtmF(inout FfxFloat32x3 c)
+ {
+ c *= ffxBroadcast3(rcp(ffxMax3(c.r, c.g, c.b) + FfxFloat32(1.0)));
+ }
+ // The extra max solves the c=1.0 case (which is a /0).
+ void FsrSrtmInvF(inout FfxFloat32x3 c){c*=ffxBroadcast3(rcp(max(FfxFloat32(1.0/32768.0),FfxFloat32(1.0)-ffxMax3(c.r,c.g,c.b))));}
+#endif
+//==============================================================================================================================
+#if defined(FFXM_GPU )&& FFXM_HALF == 1
+ void FsrSrtmH(inout FfxFloat16x3 c)
+ {
+ c *= FFXM_BROADCAST_FLOAT16X3(ffxReciprocalHalf(ffxMax3Half(c.r, c.g, c.b) + FFXM_BROADCAST_FLOAT16(1.0)));
+ }
+ void FsrSrtmInvH(inout FfxFloat16x3 c)
+ {
+ c *= FFXM_BROADCAST_FLOAT16X3(ffxReciprocalHalf(max(FFXM_BROADCAST_FLOAT16(1.0 / 32768.0), FFXM_BROADCAST_FLOAT16(1.0) - ffxMax3Half(c.r, c.g, c.b))));
+ }
+ //------------------------------------------------------------------------------------------------------------------------------
+ void FsrSrtmHx2(inout FfxFloat16x2 cR, inout FfxFloat16x2 cG, inout FfxFloat16x2 cB)
+ {
+ FfxFloat16x2 rcp = ffxReciprocalHalf(ffxMax3Half(cR, cG, cB) + FFXM_BROADCAST_FLOAT16X2(1.0));
+ cR *= rcp;
+ cG *= rcp;
+ cB *= rcp;
+ }
+ void FsrSrtmInvHx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB)
+ {
+ FfxFloat16x2 rcp=ffxReciprocalHalf(max(FFXM_BROADCAST_FLOAT16X2(1.0/32768.0),FFXM_BROADCAST_FLOAT16X2(1.0)-ffxMax3Half(cR,cG,cB)));
+ cR*=rcp;
+ cG*=rcp;
+ cB*=rcp;
+ }
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+// FSR - [TEPD] TEMPORAL ENERGY PRESERVING DITHER
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// Temporally energy preserving dithered {0 to 1} linear to gamma 2.0 conversion.
+// Gamma 2.0 is used so that the conversion back to linear is just to square the color.
+// The conversion comes in 8-bit and 10-bit modes, designed for output to 8-bit UNORM or 10:10:10:2 respectively.
+// Given good non-biased temporal blue noise as dither input,
+// the output dither will temporally conserve energy.
+// This is done by choosing the linear nearest step point instead of perceptual nearest.
+// See code below for details.
+//------------------------------------------------------------------------------------------------------------------------------
+// DX SPEC RULES FOR FLOAT->UNORM 8-BIT CONVERSION
+// ===============================================
+// - Output is 'FfxUInt32(floor(saturate(n)*255.0+0.5))'.
+// - Thus rounding is to nearest.
+// - NaN gets converted to zero.
+// - INF is clamped to {0.0 to 1.0}.
+//==============================================================================================================================
+#if defined(FFXM_GPU)
+ // Hand tuned integer position to dither value, with more values than simple checkerboard.
+ // Only 32-bit has enough precision for this compddation.
+ // Output is {0 to <1}.
+ FfxFloat32 FsrTepdDitF(FfxUInt32x2 p, FfxUInt32 f)
+ {
+ FfxFloat32 x = FfxFloat32(p.x + f);
+ FfxFloat32 y = FfxFloat32(p.y);
+ // The 1.61803 golden ratio.
+ FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0);
+ // Number designed to provide a good visual pattern.
+ FfxFloat32 b = FfxFloat32(1.0 / 3.69);
+ x = x * a + (y * b);
+ return ffxFract(x);
+ }
+ //------------------------------------------------------------------------------------------------------------------------------
+ // This version is 8-bit gamma 2.0.
+ // The 'c' input is {0 to 1}.
+ // Output is {0 to 1} ready for image store.
+ void FsrTepdC8F(inout FfxFloat32x3 c, FfxFloat32 dit)
+ {
+ FfxFloat32x3 n = ffxSqrt(c);
+ n = floor(n * ffxBroadcast3(255.0)) * ffxBroadcast3(1.0 / 255.0);
+ FfxFloat32x3 a = n * n;
+ FfxFloat32x3 b = n + ffxBroadcast3(1.0 / 255.0);
+ b = b * b;
+ // Ratio of 'a' to 'b' required to produce 'c'.
+ // ffxApproximateReciprocal() won't work here (at least for very high dynamic ranges).
+ // ffxApproximateReciprocalMedium() is an IADD,FMA,MUL.
+ FfxFloat32x3 r = (c - b) * ffxApproximateReciprocalMedium(a - b);
+ // Use the ratio as a cutoff to choose 'a' or 'b'.
+ // ffxIsGreaterThanZero() is a MUL.
+ c = ffxSaturate(n + ffxIsGreaterThanZero(ffxBroadcast3(dit) - r) * ffxBroadcast3(1.0 / 255.0));
+ }
+ //------------------------------------------------------------------------------------------------------------------------------
+ // This version is 10-bit gamma 2.0.
+ // The 'c' input is {0 to 1}.
+ // Output is {0 to 1} ready for image store.
+ void FsrTepdC10F(inout FfxFloat32x3 c, FfxFloat32 dit)
+ {
+ FfxFloat32x3 n = ffxSqrt(c);
+ n = floor(n * ffxBroadcast3(1023.0)) * ffxBroadcast3(1.0 / 1023.0);
+ FfxFloat32x3 a = n * n;
+ FfxFloat32x3 b = n + ffxBroadcast3(1.0 / 1023.0);
+ b = b * b;
+ FfxFloat32x3 r = (c - b) * ffxApproximateReciprocalMedium(a - b);
+ c = ffxSaturate(n + ffxIsGreaterThanZero(ffxBroadcast3(dit) - r) * ffxBroadcast3(1.0 / 1023.0));
+ }
+#endif
+//==============================================================================================================================
+#if defined(FFXM_GPU)&& FFXM_HALF == 1
+ FfxFloat16 FsrTepdDitH(FfxUInt32x2 p, FfxUInt32 f)
+ {
+ FfxFloat32 x = FfxFloat32(p.x + f);
+ FfxFloat32 y = FfxFloat32(p.y);
+ FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0);
+ FfxFloat32 b = FfxFloat32(1.0 / 3.69);
+ x = x * a + (y * b);
+ return FfxFloat16(ffxFract(x));
+ }
+ //------------------------------------------------------------------------------------------------------------------------------
+ void FsrTepdC8H(inout FfxFloat16x3 c, FfxFloat16 dit)
+ {
+ FfxFloat16x3 n = sqrt(c);
+ n = floor(n * FFXM_BROADCAST_FLOAT16X3(255.0)) * FFXM_BROADCAST_FLOAT16X3(1.0 / 255.0);
+ FfxFloat16x3 a = n * n;
+ FfxFloat16x3 b = n + FFXM_BROADCAST_FLOAT16X3(1.0 / 255.0);
+ b = b * b;
+ FfxFloat16x3 r = (c - b) * ffxApproximateReciprocalMediumHalf(a - b);
+ c = ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFXM_BROADCAST_FLOAT16X3(dit) - r) * FFXM_BROADCAST_FLOAT16X3(1.0 / 255.0));
+ }
+ //------------------------------------------------------------------------------------------------------------------------------
+ void FsrTepdC10H(inout FfxFloat16x3 c, FfxFloat16 dit)
+ {
+ FfxFloat16x3 n = sqrt(c);
+ n = floor(n * FFXM_BROADCAST_FLOAT16X3(1023.0)) * FFXM_BROADCAST_FLOAT16X3(1.0 / 1023.0);
+ FfxFloat16x3 a = n * n;
+ FfxFloat16x3 b = n + FFXM_BROADCAST_FLOAT16X3(1.0 / 1023.0);
+ b = b * b;
+ FfxFloat16x3 r = (c - b) * ffxApproximateReciprocalMediumHalf(a - b);
+ c = ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFXM_BROADCAST_FLOAT16X3(dit) - r) * FFXM_BROADCAST_FLOAT16X3(1.0 / 1023.0));
+ }
+ //==============================================================================================================================
+ // This computes dither for positions 'p' and 'p+{8,0}'.
+ FfxFloat16x2 FsrTepdDitHx2(FfxUInt32x2 p, FfxUInt32 f)
+ {
+ FfxFloat32x2 x;
+ x.x = FfxFloat32(p.x + f);
+ x.y = x.x + FfxFloat32(8.0);
+ FfxFloat32 y = FfxFloat32(p.y);
+ FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0);
+ FfxFloat32 b = FfxFloat32(1.0 / 3.69);
+ x = x * ffxBroadcast2(a) + ffxBroadcast2(y * b);
+ return FfxFloat16x2(ffxFract(x));
+ }
+ //------------------------------------------------------------------------------------------------------------------------------
+ void FsrTepdC8Hx2(inout FfxFloat16x2 cR, inout FfxFloat16x2 cG, inout FfxFloat16x2 cB, FfxFloat16x2 dit)
+ {
+ FfxFloat16x2 nR = sqrt(cR);
+ FfxFloat16x2 nG = sqrt(cG);
+ FfxFloat16x2 nB = sqrt(cB);
+ nR = floor(nR * FFXM_BROADCAST_FLOAT16X2(255.0)) * FFXM_BROADCAST_FLOAT16X2(1.0 / 255.0);
+ nG = floor(nG * FFXM_BROADCAST_FLOAT16X2(255.0)) * FFXM_BROADCAST_FLOAT16X2(1.0 / 255.0);
+ nB = floor(nB * FFXM_BROADCAST_FLOAT16X2(255.0)) * FFXM_BROADCAST_FLOAT16X2(1.0 / 255.0);
+ FfxFloat16x2 aR = nR * nR;
+ FfxFloat16x2 aG = nG * nG;
+ FfxFloat16x2 aB = nB * nB;
+ FfxFloat16x2 bR = nR + FFXM_BROADCAST_FLOAT16X2(1.0 / 255.0);
+ bR = bR * bR;
+ FfxFloat16x2 bG = nG + FFXM_BROADCAST_FLOAT16X2(1.0 / 255.0);
+ bG = bG * bG;
+ FfxFloat16x2 bB = nB + FFXM_BROADCAST_FLOAT16X2(1.0 / 255.0);
+ bB = bB * bB;
+ FfxFloat16x2 rR = (cR - bR) * ffxApproximateReciprocalMediumHalf(aR - bR);
+ FfxFloat16x2 rG = (cG - bG) * ffxApproximateReciprocalMediumHalf(aG - bG);
+ FfxFloat16x2 rB = (cB - bB) * ffxApproximateReciprocalMediumHalf(aB - bB);
+ cR = ffxSaturate(nR + ffxIsGreaterThanZeroHalf(dit - rR) * FFXM_BROADCAST_FLOAT16X2(1.0 / 255.0));
+ cG = ffxSaturate(nG + ffxIsGreaterThanZeroHalf(dit - rG) * FFXM_BROADCAST_FLOAT16X2(1.0 / 255.0));
+ cB = ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFXM_BROADCAST_FLOAT16X2(1.0 / 255.0));
+ }
+ //------------------------------------------------------------------------------------------------------------------------------
+ void FsrTepdC10Hx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB,FfxFloat16x2 dit){
+ FfxFloat16x2 nR=sqrt(cR);
+ FfxFloat16x2 nG=sqrt(cG);
+ FfxFloat16x2 nB=sqrt(cB);
+ nR=floor(nR*FFXM_BROADCAST_FLOAT16X2(1023.0))*FFXM_BROADCAST_FLOAT16X2(1.0/1023.0);
+ nG=floor(nG*FFXM_BROADCAST_FLOAT16X2(1023.0))*FFXM_BROADCAST_FLOAT16X2(1.0/1023.0);
+ nB=floor(nB*FFXM_BROADCAST_FLOAT16X2(1023.0))*FFXM_BROADCAST_FLOAT16X2(1.0/1023.0);
+ FfxFloat16x2 aR=nR*nR;
+ FfxFloat16x2 aG=nG*nG;
+ FfxFloat16x2 aB=nB*nB;
+ FfxFloat16x2 bR=nR+FFXM_BROADCAST_FLOAT16X2(1.0/1023.0);bR=bR*bR;
+ FfxFloat16x2 bG=nG+FFXM_BROADCAST_FLOAT16X2(1.0/1023.0);bG=bG*bG;
+ FfxFloat16x2 bB=nB+FFXM_BROADCAST_FLOAT16X2(1.0/1023.0);bB=bB*bB;
+ FfxFloat16x2 rR=(cR-bR)*ffxApproximateReciprocalMediumHalf(aR-bR);
+ FfxFloat16x2 rG=(cG-bG)*ffxApproximateReciprocalMediumHalf(aG-bG);
+ FfxFloat16x2 rB=(cB-bB)*ffxApproximateReciprocalMediumHalf(aB-bB);
+ cR=ffxSaturate(nR+ffxIsGreaterThanZeroHalf(dit-rR)*FFXM_BROADCAST_FLOAT16X2(1.0/1023.0));
+ cG=ffxSaturate(nG+ffxIsGreaterThanZeroHalf(dit-rG)*FFXM_BROADCAST_FLOAT16X2(1.0/1023.0));
+ cB = ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFXM_BROADCAST_FLOAT16X2(1.0 / 1023.0));
+}
+#endif
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr1/ffxm_fsr1.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr1/ffxm_fsr1.h.meta
new file mode 100644
index 0000000..0ceaf34
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr1/ffxm_fsr1.h.meta
@@ -0,0 +1,76 @@
+fileFormatVersion: 2
+guid: beffdc3cffeabd84491ac83b32a4d9f8
+PluginImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ iconMap: {}
+ executionOrder: {}
+ defineConstraints: []
+ isPreloaded: 0
+ isOverridable: 1
+ isExplicitlyReferenced: 0
+ validateReferences: 1
+ platformData:
+ - first:
+ : Any
+ second:
+ enabled: 0
+ settings:
+ Exclude Android: 1
+ Exclude Editor: 1
+ Exclude GameCoreScarlett: 1
+ Exclude GameCoreXboxOne: 1
+ Exclude Linux64: 1
+ Exclude OSXUniversal: 1
+ Exclude PS4: 1
+ Exclude PS5: 1
+ Exclude WebGL: 1
+ Exclude Win: 1
+ Exclude Win64: 1
+ - first:
+ Android: Android
+ second:
+ enabled: 0
+ settings:
+ AndroidSharedLibraryType: Executable
+ CPU: ARMv7
+ - first:
+ Any:
+ second:
+ enabled: 0
+ settings: {}
+ - first:
+ Editor: Editor
+ second:
+ enabled: 0
+ settings:
+ CPU: AnyCPU
+ DefaultValueInitialized: true
+ OS: AnyOS
+ - first:
+ Standalone: Linux64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: OSXUniversal
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2.meta
new file mode 100644
index 0000000..1656967
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: 8001d8b2af47f59409d886267a5ce04b
+folderAsset: yes
+DefaultImporter:
+ externalObjects: {}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h
new file mode 100644
index 0000000..35769f0
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h
@@ -0,0 +1,379 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#ifndef FFXM_FSR2_ACCUMULATE_H
+#define FFXM_FSR2_ACCUMULATE_H
+
+struct AccumulateOutputs
+{
+#if !FFXM_SHADER_QUALITY_OPT_SEPARATE_TEMPORAL_REACTIVE
+ FfxFloat32x4 fColorAndWeight;
+#else
+ FfxFloat32x3 fUpscaledColor;
+ FfxFloat32 fTemporalReactive;
+#endif
+ FfxFloat32x2 fLockStatus;
+ FfxFloat32x4 fLumaHistory;
+#if (FFXM_FSR2_OPTION_APPLY_SHARPENING == 0)
+ FfxFloat32x3 fColor;
+#endif
+};
+
+FfxFloat32 GetPxHrVelocity(FfxFloat32x2 fMotionVector)
+{
+ return length(fMotionVector * DisplaySize());
+}
+#if FFXM_HALF
+FFXM_MIN16_F GetPxHrVelocity(FFXM_MIN16_F2 fMotionVector)
+{
+ return length(fMotionVector * FFXM_MIN16_F2(DisplaySize()));
+}
+#endif
+
+void Accumulate(const AccumulationPassCommonParams params, FFXM_PARAMETER_INOUT FfxFloat32x3 fHistoryColor, FfxFloat32x3 fAccumulation, FFXM_PARAMETER_IN FfxFloat32x4 fUpsampledColorAndWeight)
+{
+ // Aviod invalid values when accumulation and upsampled weight is 0
+ fAccumulation = ffxMax(FSR2_EPSILON.xxx, fAccumulation + fUpsampledColorAndWeight.www);
+
+#if FFXM_FSR2_OPTION_HDR_COLOR_INPUT
+#if FFXM_SHADER_QUALITY_OPT_TONEMAPPED_RGB_PREPARED_INPUT_COLOR
+ fHistoryColor = Tonemap(fHistoryColor);
+#else
+ //YCoCg -> RGB -> Tonemap -> YCoCg (Use RGB tonemapper to avoid color desaturation)
+ fUpsampledColorAndWeight.xyz = RGBToYCoCg(Tonemap(YCoCgToRGB(fUpsampledColorAndWeight.xyz)));
+ fHistoryColor = RGBToYCoCg(Tonemap(YCoCgToRGB(fHistoryColor)));
+#endif
+#endif
+
+ const FfxFloat32x3 fAlpha = fUpsampledColorAndWeight.www / fAccumulation;
+ fHistoryColor = ffxLerp(fHistoryColor, fUpsampledColorAndWeight.xyz, fAlpha);
+
+#if !FFXM_SHADER_QUALITY_OPT_TONEMAPPED_RGB_PREPARED_INPUT_COLOR
+ fHistoryColor = YCoCgToRGB(fHistoryColor);
+#endif
+
+#if FFXM_FSR2_OPTION_HDR_COLOR_INPUT
+ fHistoryColor = InverseTonemap(fHistoryColor);
+#endif
+}
+
+#if FFXM_HALF
+void RectifyHistory(
+ const AccumulationPassCommonParams params,
+ RectificationBoxMin16 clippingBox,
+ FFXM_PARAMETER_INOUT FfxFloat32x3 fHistoryColor,
+ FFXM_PARAMETER_INOUT FfxFloat32x3 fAccumulation,
+ FfxFloat32 fLockContributionThisFrame,
+ FfxFloat32 fTemporalReactiveFactor,
+ FfxFloat32 fLumaInstabilityFactor)
+#else
+void RectifyHistory(
+ const AccumulationPassCommonParams params,
+ RectificationBox clippingBox,
+ FFXM_PARAMETER_INOUT FfxFloat32x3 fHistoryColor,
+ FFXM_PARAMETER_INOUT FfxFloat32x3 fAccumulation,
+ FfxFloat32 fLockContributionThisFrame,
+ FfxFloat32 fTemporalReactiveFactor,
+ FfxFloat32 fLumaInstabilityFactor)
+#endif
+{
+ FfxFloat32 fScaleFactorInfluence = ffxMin(20.0f, ffxPow(FfxFloat32(1.0f / length(DownscaleFactor().x * DownscaleFactor().y)), 3.0f));
+
+ const FfxFloat32 fVecolityFactor = ffxSaturate(params.fHrVelocity / 20.0f);
+ const FfxFloat32 fBoxScaleT = ffxMax(params.fDepthClipFactor, ffxMax(params.fAccumulationMask, fVecolityFactor));
+ FfxFloat32 fBoxScale = ffxLerp(fScaleFactorInfluence, 1.0f, fBoxScaleT);
+
+ FfxFloat32x3 fScaledBoxVec = clippingBox.boxVec * fBoxScale;
+ FfxFloat32x3 boxMin = clippingBox.boxCenter - fScaledBoxVec;
+ FfxFloat32x3 boxMax = clippingBox.boxCenter + fScaledBoxVec;
+ FfxFloat32x3 boxCenter = clippingBox.boxCenter;
+ FfxFloat32 boxVecSize = length(clippingBox.boxVec);
+
+ boxMin = ffxMax(clippingBox.aabbMin, boxMin);
+ boxMax = ffxMin(clippingBox.aabbMax, boxMax);
+#if FFXM_SHADER_QUALITY_OPT_TONEMAPPED_RGB_PREPARED_INPUT_COLOR
+ boxMin = InverseTonemap(boxMin);
+ boxMax = InverseTonemap(boxMax);
+#endif
+
+ if (any(FFXM_GREATER_THAN(boxMin, fHistoryColor)) || any(FFXM_GREATER_THAN(fHistoryColor, boxMax))) {
+
+ const FfxFloat32x3 fClampedHistoryColor = clamp(fHistoryColor, boxMin, boxMax);
+
+ FfxFloat32x3 fHistoryContribution = ffxMax(fLumaInstabilityFactor, fLockContributionThisFrame).xxx;
+
+ const FfxFloat32 fReactiveFactor = params.fDilatedReactiveFactor;
+ const FfxFloat32 fReactiveContribution = 1.0f - ffxPow(fReactiveFactor, 1.0f / 2.0f);
+ fHistoryContribution *= fReactiveContribution;
+
+ // Scale history color using rectification info, also using accumulation mask to avoid potential invalid color protection
+ fHistoryColor = ffxLerp(fClampedHistoryColor, fHistoryColor, ffxSaturate(fHistoryContribution));
+
+ // Scale accumulation using rectification info
+ const FfxFloat32x3 fAccumulationMin = ffxMin(fAccumulation, FFXM_BROADCAST_FLOAT32X3(0.1f));
+ fAccumulation = ffxLerp(fAccumulationMin, fAccumulation, ffxSaturate(fHistoryContribution));
+ }
+}
+
+void FinalizeLockStatus(const AccumulationPassCommonParams params, FfxFloat32x2 fLockStatus, FfxFloat32 fUpsampledWeight, FFXM_PARAMETER_INOUT AccumulateOutputs result)
+{
+ // we expect similar motion for next frame
+ // kill lock if that location is outside screen, avoid locks to be clamped to screen borders
+ FfxFloat32x2 fEstimatedUvNextFrame = params.fHrUv - params.fMotionVector;
+ if (IsUvInside(fEstimatedUvNextFrame) == false) {
+ KillLock(fLockStatus);
+ }
+ else {
+ // Decrease lock lifetime
+ const FfxFloat32 fLifetimeDecreaseLanczosMax = FfxFloat32(JitterSequenceLength()) * FfxFloat32(fAverageLanczosWeightPerFrame);
+ const FfxFloat32 fLifetimeDecrease = FfxFloat32(fUpsampledWeight / fLifetimeDecreaseLanczosMax);
+ fLockStatus[LOCK_LIFETIME_REMAINING] = ffxMax(FfxFloat32(0), fLockStatus[LOCK_LIFETIME_REMAINING] - fLifetimeDecrease);
+ }
+
+ result.fLockStatus = fLockStatus;
+}
+
+
+FfxFloat32x3 ComputeBaseAccumulationWeight(const AccumulationPassCommonParams params, FfxFloat32 fThisFrameReactiveFactor, FfxBoolean bInMotionLastFrame, FfxFloat32 fUpsampledWeight, LockState lockState)
+{
+ // Always assume max accumulation was reached
+ FfxFloat32 fBaseAccumulation = fMaxAccumulationLanczosWeight * FfxFloat32(params.bIsExistingSample) * (1.0f - fThisFrameReactiveFactor) * (1.0f - params.fDepthClipFactor);
+
+ fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight * 10.0f, ffxMax(FfxFloat32(bInMotionLastFrame), ffxSaturate(params.fHrVelocity * FfxFloat32(10)))));
+
+ fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight, ffxSaturate(params.fHrVelocity / FfxFloat32(20))));
+
+ return fBaseAccumulation.xxx;
+}
+
+#if FFXM_HALF
+FfxFloat32 ComputeLumaInstabilityFactor(const AccumulationPassCommonParams params, RectificationBoxMin16 clippingBox, FfxFloat32 fThisFrameReactiveFactor, FfxFloat32 fLuminanceDiff, FFXM_PARAMETER_INOUT AccumulateOutputs result)
+#else
+FfxFloat32 ComputeLumaInstabilityFactor(const AccumulationPassCommonParams params, RectificationBox clippingBox, FfxFloat32 fThisFrameReactiveFactor, FfxFloat32 fLuminanceDiff, FFXM_PARAMETER_INOUT AccumulateOutputs result)
+#endif
+{
+ const FfxFloat32 fUnormThreshold = 1.0f / 255.0f;
+ const FfxInt32 N_MINUS_1 = 0;
+ const FfxInt32 N_MINUS_2 = 1;
+ const FfxInt32 N_MINUS_3 = 2;
+ const FfxInt32 N_MINUS_4 = 3;
+
+ FfxFloat32 fCurrentFrameLuma = clippingBox.boxCenter.x;
+
+#if FFXM_FSR2_OPTION_HDR_COLOR_INPUT
+ fCurrentFrameLuma = fCurrentFrameLuma / (1.0f + ffxMax(0.0f, fCurrentFrameLuma));
+#endif
+
+ fCurrentFrameLuma = round(fCurrentFrameLuma * 255.0f) / 255.0f;
+
+ const FfxBoolean bSampleLumaHistory = (ffxMax(ffxMax(params.fDepthClipFactor, params.fAccumulationMask), fLuminanceDiff) < 0.1f) && (params.bIsNewSample == false);
+ FfxFloat32x4 fCurrentFrameLumaHistory = bSampleLumaHistory ? SampleLumaHistory(params.fReprojectedHrUv) : FFXM_BROADCAST_FLOAT32X4(0.0f);
+
+ FfxFloat32 fLumaInstability = 0.0f;
+ FfxFloat32 fDiffs0 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[N_MINUS_1]);
+
+ FfxFloat32 fMin = abs(fDiffs0);
+
+ if (fMin >= fUnormThreshold) {
+ for (int i = N_MINUS_2; i <= N_MINUS_4; i++) {
+ FfxFloat32 fDiffs1 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[i]);
+
+ if (sign(fDiffs0) == sign(fDiffs1)) {
+
+ // Scale difference to protect historically similar values
+ const FfxFloat32 fMinBias = 1.0f;
+ fMin = ffxMin(fMin, abs(fDiffs1) * fMinBias);
+ }
+ }
+
+ const FfxFloat32 fBoxSize = clippingBox.boxVec.x;
+ const FfxFloat32 fBoxSizeFactor = ffxPow(ffxSaturate(fBoxSize / 0.1f), 6.0f);
+
+ fLumaInstability = FfxFloat32(fMin != abs(fDiffs0)) * fBoxSizeFactor;
+ fLumaInstability = FfxFloat32(fLumaInstability > fUnormThreshold);
+
+ fLumaInstability *= 1.0f - ffxMax(params.fAccumulationMask, ffxPow(fThisFrameReactiveFactor, 1.0f / 6.0f));
+ }
+
+ //shift history
+ fCurrentFrameLumaHistory[N_MINUS_4] = fCurrentFrameLumaHistory[N_MINUS_3];
+ fCurrentFrameLumaHistory[N_MINUS_3] = fCurrentFrameLumaHistory[N_MINUS_2];
+ fCurrentFrameLumaHistory[N_MINUS_2] = fCurrentFrameLumaHistory[N_MINUS_1];
+ fCurrentFrameLumaHistory[N_MINUS_1] = fCurrentFrameLuma;
+
+ result.fLumaHistory = fCurrentFrameLumaHistory;
+
+ return fLumaInstability * FfxFloat32(fCurrentFrameLumaHistory[N_MINUS_4] != 0);
+}
+
+FfxFloat32 ComputeTemporalReactiveFactor(const AccumulationPassCommonParams params, FfxFloat32 fTemporalReactiveFactor)
+{
+ FfxFloat32 fNewFactor = ffxMin(0.99f, fTemporalReactiveFactor);
+
+ fNewFactor = ffxMax(fNewFactor, ffxLerp(fNewFactor, 0.4f, ffxSaturate(params.fHrVelocity)));
+
+ fNewFactor = ffxMax(fNewFactor * fNewFactor, ffxMax(params.fDepthClipFactor * 0.1f, params.fDilatedReactiveFactor));
+
+ // Force reactive factor for new samples
+ fNewFactor = params.bIsNewSample ? 1.0f : fNewFactor;
+
+ if (ffxSaturate(params.fHrVelocity * 10.0f) >= 1.0f) {
+ fNewFactor = ffxMax(FSR2_EPSILON, fNewFactor) * -1.0f;
+ }
+
+ return fNewFactor;
+}
+
+void initReactiveMaskFactors(FFXM_PARAMETER_INOUT AccumulationPassCommonParams params)
+{
+ const FFXM_MIN16_F2 fDilatedReactiveMasks = FFXM_MIN16_F2(SampleDilatedReactiveMasks(params.fLrUv_HwSampler));
+ params.fDilatedReactiveFactor = fDilatedReactiveMasks.x;
+ params.fAccumulationMask = fDilatedReactiveMasks.y;
+}
+
+void initDepthClipFactors(FFXM_PARAMETER_INOUT AccumulationPassCommonParams params)
+{
+ params.fDepthClipFactor = FFXM_MIN16_F(ffxSaturate(SampleDepthClip(params.fLrUv_HwSampler)));
+}
+
+void initIsNewSample(FFXM_PARAMETER_INOUT AccumulationPassCommonParams params)
+{
+ const FfxBoolean bIsResetFrame = (0 == FrameIndex());
+ params.bIsNewSample = (params.bIsExistingSample == false || bIsResetFrame);
+}
+
+
+AccumulationPassCommonParams InitParams(FfxInt32x2 iPxHrPos)
+{
+ AccumulationPassCommonParams params = (AccumulationPassCommonParams)0;
+
+ params.iPxHrPos = iPxHrPos;
+ const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize();
+ params.fHrUv = fHrUv;
+
+ const FfxFloat32x2 fLrUvJittered = fHrUv + Jitter() / RenderSize();
+ params.fLrUv_HwSampler = ClampUv(fLrUvJittered, RenderSize(), MaxRenderSize());
+
+ params.fMotionVector = GetMotionVector(iPxHrPos, fHrUv);
+ params.fHrVelocity = GetPxHrVelocity(params.fMotionVector);
+
+ ComputeReprojectedUVs(params, params.fReprojectedHrUv, params.bIsExistingSample);
+
+ return params;
+}
+
+AccumulateOutputs Accumulate(FfxInt32x2 iPxHrPos)
+{
+ AccumulationPassCommonParams params = InitParams(iPxHrPos);
+
+ FfxFloat32x3 fHistoryColor = FfxFloat32x3(0, 0, 0);
+ FFXM_MIN16_F2 fLockStatus;
+ InitializeNewLockSample(fLockStatus);
+
+ FFXM_MIN16_F fTemporalReactiveFactor = FFXM_MIN16_F(0.0f);
+ FfxBoolean bInMotionLastFrame = FFXM_FALSE;
+ LockState lockState = { FFXM_FALSE , FFXM_FALSE };
+ const FfxBoolean bIsResetFrame = (0 == FrameIndex());
+ if (params.bIsExistingSample && !bIsResetFrame) {
+ ReprojectHistoryColor(params, fHistoryColor, fTemporalReactiveFactor, bInMotionLastFrame);
+ lockState = ReprojectHistoryLockStatus(params, fLockStatus);
+ }
+
+ initReactiveMaskFactors(params);
+ initDepthClipFactors(params);
+
+ FfxFloat32 fThisFrameReactiveFactor = ffxMax(params.fDilatedReactiveFactor, fTemporalReactiveFactor);
+
+ FfxFloat32 fLuminanceDiff = 0.0f;
+ FfxFloat32 fLockContributionThisFrame = 0.0f;
+ FfxFloat32x2 fLockStatus32 = {fLockStatus.x, fLockStatus.y};
+ UpdateLockStatus(params, fThisFrameReactiveFactor, lockState, fLockStatus32, fLockContributionThisFrame, fLuminanceDiff);
+ fLockStatus = FFXM_MIN16_F2(fLockStatus32);
+
+#ifdef FFXM_HLSL
+ AccumulateOutputs results = (AccumulateOutputs)0;
+#else
+ AccumulateOutputs results;
+#endif
+
+ // Load upsampled input color
+#if FFXM_HALF
+#ifdef FFXM_HLSL
+ RectificationBoxMin16 clippingBox = (RectificationBoxMin16)0;
+#else
+ RectificationBoxMin16 clippingBox;
+#endif
+#else
+#ifdef FFXM_HLSL
+ RectificationBox clippingBox = (RectificationBox)0;
+#else
+ RectificationBox clippingBox;
+#endif
+#endif
+
+ initIsNewSample(params);
+
+ FfxFloat32x4 fUpsampledColorAndWeight = ComputeUpsampledColorAndWeight(params, clippingBox, fThisFrameReactiveFactor);
+
+ FinalizeLockStatus(params, fLockStatus, fUpsampledColorAndWeight.w, results);
+
+#if FFXM_SHADER_QUALITY_OPT_DISABLE_LUMA_INSTABILITY
+ const FfxFloat32 fLumaInstabilityFactor = 0.0f;
+#else
+ const FfxFloat32 fLumaInstabilityFactor = ComputeLumaInstabilityFactor(params, clippingBox, fThisFrameReactiveFactor, fLuminanceDiff, results);
+#endif
+
+ FfxFloat32x3 fAccumulation = ComputeBaseAccumulationWeight(params, fThisFrameReactiveFactor, bInMotionLastFrame, fUpsampledColorAndWeight.w, lockState);
+
+ if (params.bIsNewSample) {
+#if FFXM_SHADER_QUALITY_OPT_TONEMAPPED_RGB_PREPARED_INPUT_COLOR
+ fHistoryColor = InverseTonemap(fUpsampledColorAndWeight.xyz);
+#else
+ fHistoryColor = YCoCgToRGB(fUpsampledColorAndWeight.xyz);
+#endif
+ }
+ else {
+ RectifyHistory(params, clippingBox, fHistoryColor, fAccumulation, fLockContributionThisFrame, fThisFrameReactiveFactor, fLumaInstabilityFactor);
+
+ Accumulate(params, fHistoryColor, fAccumulation, fUpsampledColorAndWeight);
+ }
+
+ fHistoryColor = UnprepareRgb(fHistoryColor, Exposure());
+
+ // Get new temporal reactive factor
+ fTemporalReactiveFactor = FFXM_MIN16_F(ComputeTemporalReactiveFactor(params, fThisFrameReactiveFactor));
+
+#if !FFXM_SHADER_QUALITY_OPT_SEPARATE_TEMPORAL_REACTIVE
+ results.fColorAndWeight = FfxFloat32x4(fHistoryColor, fTemporalReactiveFactor);
+#else
+ // Output the upscaled color and the temporal reactive factor if these are contained in separate textures
+ results.fUpscaledColor = fHistoryColor;
+ results.fTemporalReactive = fTemporalReactiveFactor;
+#endif
+ // Output final color when RCAS is disabled
+#if FFXM_FSR2_OPTION_APPLY_SHARPENING == 0
+ results.fColor = fHistoryColor;
+#endif
+
+ return results;
+}
+
+#endif // FFXM_FSR2_ACCUMULATE_H
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h.meta
new file mode 100644
index 0000000..77620fd
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_accumulate.h.meta
@@ -0,0 +1,67 @@
+fileFormatVersion: 2
+guid: 508ebc327e5948447894b9bb6f08f843
+PluginImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ iconMap: {}
+ executionOrder: {}
+ defineConstraints: []
+ isPreloaded: 0
+ isOverridable: 1
+ isExplicitlyReferenced: 0
+ validateReferences: 1
+ platformData:
+ - first:
+ : Any
+ second:
+ enabled: 0
+ settings:
+ Exclude Android: 1
+ Exclude Editor: 1
+ Exclude GameCoreScarlett: 1
+ Exclude GameCoreXboxOne: 1
+ Exclude Linux64: 1
+ Exclude OSXUniversal: 1
+ Exclude PS4: 1
+ Exclude PS5: 1
+ Exclude WebGL: 1
+ Exclude Win: 1
+ Exclude Win64: 1
+ - first:
+ Any:
+ second:
+ enabled: 0
+ settings: {}
+ - first:
+ Editor: Editor
+ second:
+ enabled: 0
+ settings:
+ DefaultValueInitialized: true
+ - first:
+ Standalone: Linux64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: OSXUniversal
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h
new file mode 100644
index 0000000..01f1a59
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h
@@ -0,0 +1,1040 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include "./ffxm_fsr2_resources.h"
+
+#if defined(FFXM_GPU)
+#ifdef __hlsl_dx_compiler
+#pragma dxc diagnostic push
+#pragma dxc diagnostic ignored "-Wambig-lit-shift"
+#endif //__hlsl_dx_compiler
+#include "../ffxm_core.h"
+#ifdef __hlsl_dx_compiler
+#pragma dxc diagnostic pop
+#endif //__hlsl_dx_compiler
+#endif // #if defined(FFXM_GPU)
+
+#if defined(FFXM_GPU)
+#ifndef FFXM_PREFER_WAVE64
+#define FFXM_PREFER_WAVE64
+#endif // FFXM_PREFER_WAVE64
+
+#if defined(FFXM_GPU)
+//#pragma warning(disable: 3205) // conversion from larger type to smaller
+#endif // #if defined(FFXM_GPU)
+
+#define DECLARE_SRV_REGISTER(regIndex) t##regIndex
+#define DECLARE_UAV_REGISTER(regIndex) u##regIndex
+#define DECLARE_CB_REGISTER(regIndex) b##regIndex
+#define FFXM_FSR2_DECLARE_SRV(regIndex) register(DECLARE_SRV_REGISTER(regIndex))
+#define FFXM_FSR2_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex))
+#define FFXM_FSR2_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex))
+#define SET_0_CB_START 2
+
+// Workaround
+#if FFXM_SHADER_PLATFORM_GLES_3_2
+#define FFXM_UAV_RG_QUALIFIER FfxFloat32x4
+#else
+#define FFXM_UAV_RG_QUALIFIER FfxFloat32x2
+#endif
+
+#if defined(FSR2_BIND_CB_FSR2)
+ cbuffer cbFSR2 : FFXM_FSR2_DECLARE_CB(FSR2_BIND_CB_FSR2)
+ {
+ FfxInt32x2 iRenderSize;
+ FfxInt32x2 iMaxRenderSize;
+ FfxInt32x2 iDisplaySize;
+ FfxInt32x2 iInputColorResourceDimensions;
+ FfxInt32x2 iLumaMipDimensions;
+ FfxInt32 iLumaMipLevelToUse;
+ FfxInt32 iFrameIndex;
+
+ FfxFloat32x4 fDeviceToViewDepth;
+ FfxFloat32x2 fJitter;
+ FfxFloat32x2 fMotionVectorScale;
+ FfxFloat32x2 fDownscaleFactor;
+ FfxFloat32x2 fMotionVectorJitterCancellation;
+ FfxFloat32 fPreExposure;
+ FfxFloat32 fPreviousFramePreExposure;
+ FfxFloat32 fTanHalfFOV;
+ FfxFloat32 fJitterSequenceLength;
+ FfxFloat32 fDeltaTime;
+ FfxFloat32 fDynamicResChangeFactor;
+ FfxFloat32 fViewSpaceToMetersFactor;
+
+ FfxFloat32 fPadding;
+ };
+
+#define FFXM_FSR2_CONSTANT_BUFFER_1_SIZE (sizeof(cbFSR2) / 4) // Number of 32-bit values. This must be kept in sync with the cbFSR2 size.
+
+/* Define getter functions in the order they are defined in the CB! */
+FfxInt32x2 RenderSize()
+{
+ return iRenderSize;
+}
+
+FfxInt32x2 MaxRenderSize()
+{
+ return iMaxRenderSize;
+}
+
+FfxInt32x2 DisplaySize()
+{
+ return iDisplaySize;
+}
+
+FfxInt32x2 InputColorResourceDimensions()
+{
+ return iInputColorResourceDimensions;
+}
+
+FfxInt32x2 LumaMipDimensions()
+{
+ return iLumaMipDimensions;
+}
+
+FfxInt32 LumaMipLevelToUse()
+{
+ return iLumaMipLevelToUse;
+}
+
+FfxInt32 FrameIndex()
+{
+ return iFrameIndex;
+}
+
+FfxFloat32x2 Jitter()
+{
+ return fJitter;
+}
+
+FfxFloat32x4 DeviceToViewSpaceTransformFactors()
+{
+ return fDeviceToViewDepth;
+}
+
+FfxFloat32x2 MotionVectorScale()
+{
+ return fMotionVectorScale;
+}
+
+FfxFloat32x2 DownscaleFactor()
+{
+ return fDownscaleFactor;
+}
+
+FfxFloat32x2 MotionVectorJitterCancellation()
+{
+ return fMotionVectorJitterCancellation;
+}
+
+FfxFloat32 PreExposure()
+{
+ return fPreExposure;
+}
+
+FfxFloat32 PreviousFramePreExposure()
+{
+ return fPreviousFramePreExposure;
+}
+
+FfxFloat32 TanHalfFoV()
+{
+ return fTanHalfFOV;
+}
+
+FfxFloat32 JitterSequenceLength()
+{
+ return fJitterSequenceLength;
+}
+
+FfxFloat32 DeltaTime()
+{
+ return fDeltaTime;
+}
+
+FfxFloat32 DynamicResChangeFactor()
+{
+ return fDynamicResChangeFactor;
+}
+
+FfxFloat32 ViewSpaceToMetersFactor()
+{
+ return fViewSpaceToMetersFactor;
+}
+#endif // #if defined(FSR2_BIND_CB_FSR2)
+
+#define FFXM_FSR2_ROOTSIG_STRINGIFY(p) FFXM_FSR2_ROOTSIG_STR(p)
+#define FFXM_FSR2_ROOTSIG_STR(p) #p
+#define FFXM_FSR2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFXM_FSR2_ROOTSIG_STRINGIFY(FFXM_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
+ "DescriptorTable(SRV(t0, numDescriptors = " FFXM_FSR2_ROOTSIG_STRINGIFY(FFXM_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
+ "RootConstants(num32BitConstants=" FFXM_FSR2_ROOTSIG_STRINGIFY(FFXM_FSR2_CONSTANT_BUFFER_1_SIZE) ", b0), " \
+ "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \
+ "addressU = TEXTURE_ADDRESS_CLAMP, " \
+ "addressV = TEXTURE_ADDRESS_CLAMP, " \
+ "addressW = TEXTURE_ADDRESS_CLAMP, " \
+ "comparisonFunc = COMPARISON_NEVER, " \
+ "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \
+ "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
+ "addressU = TEXTURE_ADDRESS_CLAMP, " \
+ "addressV = TEXTURE_ADDRESS_CLAMP, " \
+ "addressW = TEXTURE_ADDRESS_CLAMP, " \
+ "comparisonFunc = COMPARISON_NEVER, " \
+ "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
+
+#define FFXM_FSR2_CONSTANT_BUFFER_2_SIZE 6 // Number of 32-bit values. This must be kept in sync with max( cbRCAS , cbSPD) size.
+
+#define FFXM_FSR2_CB2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFXM_FSR2_ROOTSIG_STRINGIFY(FFXM_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
+ "DescriptorTable(SRV(t0, numDescriptors = " FFXM_FSR2_ROOTSIG_STRINGIFY(FFXM_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
+ "RootConstants(num32BitConstants=" FFXM_FSR2_ROOTSIG_STRINGIFY(FFXM_FSR2_CONSTANT_BUFFER_1_SIZE) ", b0), " \
+ "RootConstants(num32BitConstants=" FFXM_FSR2_ROOTSIG_STRINGIFY(FFXM_FSR2_CONSTANT_BUFFER_2_SIZE) ", b1), " \
+ "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \
+ "addressU = TEXTURE_ADDRESS_CLAMP, " \
+ "addressV = TEXTURE_ADDRESS_CLAMP, " \
+ "addressW = TEXTURE_ADDRESS_CLAMP, " \
+ "comparisonFunc = COMPARISON_NEVER, " \
+ "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \
+ "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
+ "addressU = TEXTURE_ADDRESS_CLAMP, " \
+ "addressV = TEXTURE_ADDRESS_CLAMP, " \
+ "addressW = TEXTURE_ADDRESS_CLAMP, " \
+ "comparisonFunc = COMPARISON_NEVER, " \
+ "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
+#if defined(FFXM_FSR2_EMBED_ROOTSIG)
+#define FFXM_FSR2_EMBED_ROOTSIG_CONTENT FFXM_FSR2_ROOTSIG
+#define FFXM_FSR2_EMBED_CB2_ROOTSIG_CONTENT FFXM_FSR2_CB2_ROOTSIG
+#else
+#define FFXM_FSR2_EMBED_ROOTSIG_CONTENT
+#define FFXM_FSR2_EMBED_CB2_ROOTSIG_CONTENT
+#endif // #if FFXM_FSR2_EMBED_ROOTSIG
+
+#if defined(FSR2_BIND_CB_RCAS)
+cbuffer cbRCAS : FFXM_FSR2_DECLARE_CB(FSR2_BIND_CB_RCAS)
+{
+ FfxUInt32x4 rcasConfig;
+};
+
+FfxUInt32x4 RCASConfig()
+{
+ return rcasConfig;
+}
+#endif // #if defined(FSR2_BIND_CB_RCAS)
+
+
+#if defined(FSR2_BIND_CB_REACTIVE)
+cbuffer cbGenerateReactive : FFXM_FSR2_DECLARE_CB(FSR2_BIND_CB_REACTIVE)
+{
+ FfxFloat32 gen_reactive_scale;
+ FfxFloat32 gen_reactive_threshold;
+ FfxFloat32 gen_reactive_binaryValue;
+ FfxUInt32 gen_reactive_flags;
+};
+
+FfxFloat32 GenReactiveScale()
+{
+ return gen_reactive_scale;
+}
+
+FfxFloat32 GenReactiveThreshold()
+{
+ return gen_reactive_threshold;
+}
+
+FfxFloat32 GenReactiveBinaryValue()
+{
+ return gen_reactive_binaryValue;
+}
+
+FfxUInt32 GenReactiveFlags()
+{
+ return gen_reactive_flags;
+}
+#endif // #if defined(FSR2_BIND_CB_REACTIVE)
+
+#if defined(FSR2_BIND_CB_SPD)
+cbuffer cbSPD : FFXM_FSR2_DECLARE_CB(FSR2_BIND_CB_SPD) {
+
+ FfxUInt32 mips;
+ FfxUInt32 numWorkGroups;
+ FfxUInt32x2 workGroupOffset;
+ FfxUInt32x2 renderSize;
+};
+
+FfxUInt32 MipCount()
+{
+ return mips;
+}
+
+FfxUInt32 NumWorkGroups()
+{
+ return numWorkGroups;
+}
+
+FfxUInt32x2 WorkGroupOffset()
+{
+ return workGroupOffset;
+}
+
+FfxUInt32x2 SPD_RenderSize()
+{
+ return renderSize;
+}
+#endif // #if defined(FSR2_BIND_CB_SPD)
+
+// Declare and sample camera buffers as regular textures, unless overridden
+#if !defined(UNITY_FSR_TEX2D)
+#define UNITY_FSR_TEX2D(type) Texture2D
+#endif
+#if !defined(UNITY_FSR_RWTEX2D)
+#define UNITY_FSR_RWTEX2D(type) RWTexture2D
+#endif
+#if !defined(UNITY_FSR_POS)
+#define UNITY_FSR_POS(pxPos) (pxPos)
+#endif
+#if !defined(UNITY_FSR_UV)
+#define UNITY_FSR_UV(uv) (uv)
+#endif
+
+SamplerState s_PointClamp : register(s0);
+SamplerState s_LinearClamp : register(s1);
+
+ // SRVs
+ #if defined FSR2_BIND_SRV_INPUT_COLOR
+ UNITY_FSR_TEX2D(FfxFloat32x4) r_input_color_jittered : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_COLOR);
+ #endif
+ #if defined FSR2_BIND_SRV_INPUT_OPAQUE_ONLY
+ UNITY_FSR_TEX2D(FfxFloat32x4) r_input_opaque_only : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY);
+ #endif
+ #if defined FSR2_BIND_SRV_INPUT_MOTION_VECTORS
+ UNITY_FSR_TEX2D(FfxFloat32x4) r_input_motion_vectors : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_MOTION_VECTORS);
+ #endif
+ #if defined FSR2_BIND_SRV_INPUT_DEPTH
+ UNITY_FSR_TEX2D(FfxFloat32) r_input_depth : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_DEPTH);
+ #endif
+ #if defined FSR2_BIND_SRV_INPUT_EXPOSURE
+ Texture2D r_input_exposure : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_EXPOSURE);
+ #endif
+ #if defined FSR2_BIND_SRV_AUTO_EXPOSURE
+ Texture2D r_auto_exposure : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_AUTO_EXPOSURE);
+ #endif
+ #if defined FSR2_BIND_SRV_REACTIVE_MASK
+ UNITY_FSR_TEX2D(FfxFloat32) r_reactive_mask : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_REACTIVE_MASK);
+ #endif
+ #if defined FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK
+ UNITY_FSR_TEX2D(FfxFloat32) r_transparency_and_composition_mask : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK);
+ #endif
+ #if defined FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH
+ Texture2D r_reconstructed_previous_nearest_depth : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH);
+ #endif
+ #if defined FSR2_BIND_SRV_DILATED_MOTION_VECTORS
+ Texture2D r_dilated_motion_vectors : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_MOTION_VECTORS);
+ #endif
+ #if defined FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS
+ Texture2D r_previous_dilated_motion_vectors : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS);
+ #endif
+ #if defined FSR2_BIND_SRV_DILATED_DEPTH
+ Texture2D r_dilatedDepth : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_DEPTH);
+ #endif
+ #if defined FSR2_BIND_SRV_INTERNAL_UPSCALED
+ Texture2D r_internal_upscaled_color : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INTERNAL_UPSCALED);
+ #endif
+ #if defined FSR2_BIND_SRV_LOCK_STATUS
+ Texture2D r_lock_status : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_STATUS);
+ #endif
+ #if defined FSR2_BIND_SRV_LOCK_INPUT_LUMA
+ Texture2D r_lock_input_luma : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_INPUT_LUMA);
+ #endif
+ #if defined FSR2_BIND_SRV_NEW_LOCKS
+ Texture2D r_new_locks : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_NEW_LOCKS);
+ #endif
+ #if defined FSR2_BIND_SRV_PREPARED_INPUT_COLOR
+ Texture2D r_prepared_input_color : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR);
+ #endif
+ #if defined FSR2_BIND_SRV_LUMA_HISTORY
+ Texture2D r_luma_history : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY);
+ #endif
+ #if defined FSR2_BIND_SRV_RCAS_INPUT
+ Texture2D r_rcas_input : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RCAS_INPUT);
+ #endif
+ #if defined FSR2_BIND_SRV_LANCZOS_LUT
+ Texture2D r_lanczos_lut : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LANCZOS_LUT);
+ #endif
+ #if defined FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS
+ Texture2D r_imgMips : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS);
+ #endif
+ #if defined FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT
+ Texture2D r_upsample_maximum_bias_lut : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT);
+ #endif
+ #if defined FSR2_BIND_SRV_DILATED_REACTIVE_MASKS
+ Texture2D r_dilated_reactive_masks : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS);
+ #endif
+
+ #if defined FSR2_BIND_SRV_TEMPORAL_REACTIVE
+ Texture2D r_internal_temporal_reactive : FFXM_FSR2_DECLARE_SRV(FSR2_BIND_SRV_TEMPORAL_REACTIVE);
+ #endif
+
+ // UAV declarations
+ #if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH
+ #if defined(SHADER_API_PSSL) && defined(SHADER_STAGE_FRAGMENT)
+ RWTexture2D rw_reconstructed_previous_nearest_depth; // Need to use implicit register binding for random write targets on PS4/5
+ #else
+ RWTexture2D rw_reconstructed_previous_nearest_depth : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH);
+ #endif
+ #endif
+ #if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS
+ RWTexture2D rw_dilated_motion_vectors : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_MOTION_VECTORS);
+ #endif
+ #if defined FSR2_BIND_UAV_DILATED_DEPTH
+ RWTexture2D rw_dilatedDepth : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_DEPTH);
+ #endif
+ #if defined FSR2_BIND_UAV_INTERNAL_UPSCALED
+ RWTexture2D rw_internal_upscaled_color : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_INTERNAL_UPSCALED);
+ #endif
+ #if defined FSR2_BIND_UAV_LOCK_STATUS
+ RWTexture2D rw_lock_status : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_STATUS);
+ #endif
+ #if defined FSR2_BIND_UAV_LOCK_INPUT_LUMA
+ RWTexture2D rw_lock_input_luma : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_INPUT_LUMA);
+ #endif
+ #if defined FSR2_BIND_UAV_NEW_LOCKS
+ #if defined(SHADER_API_PSSL) && defined(SHADER_STAGE_FRAGMENT)
+ RWTexture2D rw_new_locks; // Need to use implicit register binding for random write targets on PS4/5
+ #else
+ RWTexture2D rw_new_locks : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_NEW_LOCKS);
+ #endif
+ #endif
+ #if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR
+ RWTexture2D rw_prepared_input_color : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR);
+ #endif
+ #if defined FSR2_BIND_UAV_LUMA_HISTORY
+ RWTexture2D rw_luma_history : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY);
+ #endif
+ #if defined FSR2_BIND_UAV_UPSCALED_OUTPUT
+ UNITY_FSR_RWTEX2D(FfxFloat32x4) rw_upscaled_output : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_UPSCALED_OUTPUT);
+ #endif
+ #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
+ globallycoherent RWTexture2D rw_img_mip_shading_change : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE);
+ #endif
+ #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
+ globallycoherent RWTexture2D rw_img_mip_5 : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_5);
+ #endif
+ #if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS
+ RWTexture2D rw_dilated_reactive_masks : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS);
+ #endif
+ #if defined FSR2_BIND_UAV_EXPOSURE
+ RWTexture2D rw_exposure : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE);
+ #endif
+ #if defined FSR2_BIND_UAV_AUTO_EXPOSURE
+ RWTexture2D rw_auto_exposure : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTO_EXPOSURE);
+ #endif
+ #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
+ globallycoherent RWTexture2D rw_spd_global_atomic : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC);
+ #endif
+
+ #if defined FSR2_BIND_UAV_AUTOREACTIVE
+ RWTexture2D rw_output_autoreactive : FFXM_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTOREACTIVE);
+ #endif
+
+#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)
+FfxFloat32 LoadMipLuma(FfxUInt32x2 iPxPos, FfxUInt32 mipLevel)
+{
+ return r_imgMips.mips[mipLevel][iPxPos];
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)
+FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxUInt32 mipLevel)
+{
+ return r_imgMips.SampleLevel(s_LinearClamp, fUV, mipLevel);
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INPUT_DEPTH)
+FfxFloat32 LoadInputDepth(FfxUInt32x2 iPxPos)
+{
+ return r_input_depth[UNITY_FSR_POS(iPxPos)];
+}
+/*
+ dd00 (-1,1) *------* dd10 (0,-1)
+ | |
+ | |
+ dd01 (-1,0) *------* dd11 (0,0)
+*/
+void GatherInputDepthRQuad(FfxFloat32x2 fUV,
+ FFXM_PARAMETER_INOUT FfxFloat32 dd00,
+ FFXM_PARAMETER_INOUT FfxFloat32 dd10,
+ FFXM_PARAMETER_INOUT FfxFloat32 dd01,
+ FFXM_PARAMETER_INOUT FfxFloat32 dd11)
+{
+ FfxFloat32x4 rrrr = r_input_depth.GatherRed(s_PointClamp, UNITY_FSR_UV(fUV));
+ dd01 = FfxFloat32(rrrr.x);
+ dd11 = FfxFloat32(rrrr.y);
+ dd10 = FfxFloat32(rrrr.z);
+ dd00 = FfxFloat32(rrrr.w);
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INPUT_DEPTH)
+FfxFloat32 SampleInputDepth(FfxFloat32x2 fUV)
+{
+ return r_input_depth.SampleLevel(s_LinearClamp, UNITY_FSR_UV(fUV), 0).x;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_REACTIVE_MASK)
+FfxFloat32 LoadReactiveMask(FfxUInt32x2 iPxPos)
+{
+ return r_reactive_mask[UNITY_FSR_POS(iPxPos)];
+}
+/*
+ col00 (-1,1) *------* col10 (0,-1)
+ | |
+ | |
+ col01 (-1,0) *------* col11 (0,0)
+*/
+void GatherReactiveRQuad(FfxFloat32x2 fUV,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F col00,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F col10,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F col01,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F col11)
+{
+ FFXM_MIN16_F4 rrrr = r_reactive_mask.GatherRed(s_PointClamp, UNITY_FSR_UV(fUV));
+ col01 = FFXM_MIN16_F(rrrr.x);
+ col11 = FFXM_MIN16_F(rrrr.y);
+ col10 = FFXM_MIN16_F(rrrr.z);
+ col00 = FFXM_MIN16_F(rrrr.w);
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK)
+FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos)
+{
+ return r_transparency_and_composition_mask[UNITY_FSR_POS(iPxPos)];
+}
+/*
+ col00 (-1,1) *------* col10 (0,-1)
+ | |
+ | |
+ col01 (-1,0) *------* col11 (0,0)
+*/
+void GatherTransparencyAndCompositionMaskRQuad(FfxFloat32x2 fUV,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F col00,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F col10,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F col01,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F col11)
+{
+ FFXM_MIN16_F4 rrrr = r_transparency_and_composition_mask.GatherRed(s_PointClamp, UNITY_FSR_UV(fUV));
+ col01 = FFXM_MIN16_F(rrrr.x);
+ col11 = FFXM_MIN16_F(rrrr.y);
+ col10 = FFXM_MIN16_F(rrrr.z);
+ col00 = FFXM_MIN16_F(rrrr.w);
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INPUT_COLOR)
+FFXM_MIN16_F3 LoadInputColor(FfxUInt32x2 iPxPos)
+{
+ return r_input_color_jittered[UNITY_FSR_POS(iPxPos)].rgb;
+}
+/*
+ col00 (-1,1) *------* col10 (0,-1)
+ | |
+ | |
+ col01 (-1,0) *------* col11 (0,0)
+*/
+void GatherInputColorRGBQuad(FfxFloat32x2 fUV,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F3 col00,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F3 col10,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F3 col01,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F3 col11)
+{
+ FFXM_MIN16_F4 rrrr = r_input_color_jittered.GatherRed(s_PointClamp, UNITY_FSR_UV(fUV));
+ FFXM_MIN16_F4 gggg = r_input_color_jittered.GatherGreen(s_PointClamp, UNITY_FSR_UV(fUV));
+ FFXM_MIN16_F4 bbbb = r_input_color_jittered.GatherBlue(s_PointClamp, UNITY_FSR_UV(fUV));
+ col01 = FFXM_MIN16_F3(rrrr.x, gggg.x, bbbb.x);
+ col11 = FFXM_MIN16_F3(rrrr.y, gggg.y, bbbb.y);
+ col10 = FFXM_MIN16_F3(rrrr.z, gggg.z, bbbb.z);
+ col00 = FFXM_MIN16_F3(rrrr.w, gggg.w, bbbb.w);
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INPUT_COLOR)
+FFXM_MIN16_F3 SampleInputColor(FfxFloat32x2 fUV)
+{
+ return r_input_color_jittered.SampleLevel(s_LinearClamp, UNITY_FSR_UV(fUV), 0).rgb;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR)
+FFXM_MIN16_F3 LoadPreparedInputColor(FfxUInt32x2 iPxPos)
+{
+ return r_prepared_input_color[iPxPos].xyz;
+}
+FFXM_MIN16_F3 SamplePreparedInputColor(FfxFloat32x2 fUV)
+{
+ return r_prepared_input_color.SampleLevel(s_PointClamp, fUV, 0).xyz;
+}
+/*
+ col00 (-1,1) *------* col10 (0,-1)
+ | |
+ | |
+ col01 (-1,0) *------* col11 (0,0)
+*/
+void GatherPreparedInputColorRGBQuad(FfxFloat32x2 fUV,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F3 col00,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F3 col10,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F3 col01,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F3 col11)
+{
+ FFXM_MIN16_F4 rrrr = r_prepared_input_color.GatherRed(s_PointClamp, fUV);
+ FFXM_MIN16_F4 gggg = r_prepared_input_color.GatherGreen(s_PointClamp, fUV);
+ FFXM_MIN16_F4 bbbb = r_prepared_input_color.GatherBlue(s_PointClamp, fUV);
+ col01 = FFXM_MIN16_F3(rrrr.x, gggg.x, bbbb.x);
+ col11 = FFXM_MIN16_F3(rrrr.y, gggg.y, bbbb.y);
+ col10 = FFXM_MIN16_F3(rrrr.z, gggg.z, bbbb.z);
+ col00 = FFXM_MIN16_F3(rrrr.w, gggg.w, bbbb.w);
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS)
+FFXM_MIN16_F2 LoadInputMotionVector(FfxUInt32x2 iPxDilatedMotionVectorPos)
+{
+ FFXM_MIN16_F2 fSrcMotionVector = r_input_motion_vectors[UNITY_FSR_POS(iPxDilatedMotionVectorPos)].xy;
+
+ FFXM_MIN16_F2 fUvMotionVector = fSrcMotionVector * MotionVectorScale();
+
+#if FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS
+ fUvMotionVector -= MotionVectorJitterCancellation();
+#endif
+
+ return fUvMotionVector;
+}
+/*
+ col00 (-1,1) *------* col10 (0,-1)
+ | |
+ | |
+ col01 (-1,0) *------* col11 (0,0)
+*/
+void GatherInputMotionVectorRGQuad(FfxFloat32x2 fUV,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F2 col00,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F2 col10,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F2 col01,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F2 col11)
+{
+ FFXM_MIN16_F4 rrrr = r_input_motion_vectors.GatherRed(s_PointClamp, UNITY_FSR_UV(fUV));
+ FFXM_MIN16_F4 gggg = r_input_motion_vectors.GatherGreen(s_PointClamp, UNITY_FSR_UV(fUV));
+ col01 = FFXM_MIN16_F2(rrrr.x, gggg.x) * MotionVectorScale();
+ col11 = FFXM_MIN16_F2(rrrr.y, gggg.y) * MotionVectorScale();
+ col10 = FFXM_MIN16_F2(rrrr.z, gggg.z) * MotionVectorScale();
+ col00 = FFXM_MIN16_F2(rrrr.w, gggg.w) * MotionVectorScale();
+#if FFXM_FSR2_OPTION_JITTERED_MOTION_VECTORS
+ col01 -= MotionVectorJitterCancellation();
+ col11 -= MotionVectorJitterCancellation();
+ col10 -= MotionVectorJitterCancellation();
+ col00 -= MotionVectorJitterCancellation();
+#endif
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED)
+FFXM_MIN16_F4 LoadHistory(FfxUInt32x2 iPxHistory)
+{
+ return r_internal_upscaled_color[iPxHistory];
+}
+FFXM_MIN16_F4 SampleUpscaledHistory(FfxFloat32x2 fUV)
+{
+ return r_internal_upscaled_color.SampleLevel(s_LinearClamp, fUV, 0);
+}
+/*
+ col00 (-1,1) *------* col10 (0,-1)
+ | |
+ | |
+ col01 (-1,0) *------* col11 (0,0)
+*/
+void GatherHistoryColorRGBQuad(FfxFloat32x2 fUV,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F4 col00,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F4 col10,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F4 col01,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F4 col11)
+{
+ FFXM_MIN16_F4 rrrr = r_internal_upscaled_color.GatherRed(s_PointClamp, fUV);
+ FFXM_MIN16_F4 gggg = r_internal_upscaled_color.GatherGreen(s_PointClamp, fUV);
+ FFXM_MIN16_F4 bbbb = r_internal_upscaled_color.GatherBlue(s_PointClamp, fUV);
+ col01 = FFXM_MIN16_F4(rrrr.x, gggg.x, bbbb.x, 0.0f);
+ col11 = FFXM_MIN16_F4(rrrr.y, gggg.y, bbbb.y, 0.0f);
+ col10 = FFXM_MIN16_F4(rrrr.z, gggg.z, bbbb.z, 0.0f);
+ col00 = FFXM_MIN16_F4(rrrr.w, gggg.w, bbbb.w, 0.0f);
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_LUMA_HISTORY)
+void StoreLumaHistory(FfxUInt32x2 iPxPos, FfxFloat32x4 fLumaHistory)
+{
+ rw_luma_history[iPxPos] = fLumaHistory;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_LUMA_HISTORY)
+FFXM_MIN16_F4 SampleLumaHistory(FfxFloat32x2 fUV)
+{
+ return r_luma_history.SampleLevel(s_LinearClamp, fUV, 0);
+}
+#endif
+
+FFXM_MIN16_F4 LoadRCAS_Input(FfxInt32x2 iPxPos)
+{
+#if defined(FSR2_BIND_SRV_RCAS_INPUT)
+ return r_rcas_input.Load(FfxInt32x3(iPxPos, 0));
+#else
+ return 0.0;
+#endif
+}
+
+#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED)
+void StoreReprojectedHistory(FfxUInt32x2 iPxHistory, FfxFloat32x4 fHistory)
+{
+ rw_internal_upscaled_color[iPxHistory] = fHistory;
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED)
+void StoreInternalColorAndWeight(FfxUInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight)
+{
+ rw_internal_upscaled_color[iPxPos] = fColorAndWeight;
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT)
+void StoreUpscaledOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor)
+{
+ rw_upscaled_output[UNITY_FSR_POS(iPxPos)] = FfxFloat32x4(fColor, 1.f);
+}
+#endif
+
+//LOCK_LIFETIME_REMAINING == 0
+//Should make LockInitialLifetime() return a const 1.0f later
+#if defined(FSR2_BIND_SRV_LOCK_STATUS)
+FfxFloat32x2 LoadLockStatus(FfxUInt32x2 iPxPos)
+{
+ return r_lock_status[iPxPos];
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_LOCK_STATUS)
+void StoreLockStatus(FfxUInt32x2 iPxPos, FfxFloat32x2 fLockStatus)
+{
+ rw_lock_status[iPxPos] = fLockStatus;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA)
+FFXM_MIN16_F LoadLockInputLuma(FfxUInt32x2 iPxPos)
+{
+ return r_lock_input_luma[iPxPos];
+}
+/*
+ col00 (-1,1) *------* col10 (0,-1)
+ | |
+ | |
+ col01 (-1,0) *------* col11 (0,0)
+*/
+void GatherLockInputLumaRQuad(FfxFloat32x2 fUV,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F col00,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F col10,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F col01,
+ FFXM_PARAMETER_INOUT FFXM_MIN16_F col11)
+{
+ FFXM_MIN16_F4 rrrr = r_lock_input_luma.GatherRed(s_PointClamp, fUV);
+ col01 = FFXM_MIN16_F(rrrr.x);
+ col11 = FFXM_MIN16_F(rrrr.y);
+ col10 = FFXM_MIN16_F(rrrr.z);
+ col00 = FFXM_MIN16_F(rrrr.w);
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_NEW_LOCKS)
+FfxFloat32 LoadNewLocks(FfxUInt32x2 iPxPos)
+{
+ return r_new_locks[iPxPos];
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_NEW_LOCKS)
+FFXM_MIN16_F LoadRwNewLocks(FfxUInt32x2 iPxPos)
+{
+ return rw_new_locks[iPxPos];
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_NEW_LOCKS)
+void StoreNewLocks(FfxUInt32x2 iPxPos, FfxFloat32 newLock)
+{
+ rw_new_locks[iPxPos] = newLock;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR)
+FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV)
+{
+ return r_prepared_input_color.SampleLevel(s_LinearClamp, fUV, 0).w;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_LOCK_STATUS)
+FFXM_MIN16_F2 SampleLockStatus(FfxFloat32x2 fUV)
+{
+ FFXM_MIN16_F2 fLockStatus = r_lock_status.SampleLevel(s_LinearClamp, fUV, 0);
+ return fLockStatus;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
+FfxFloat32 LoadReconstructedPrevDepth(FfxUInt32x2 iPxPos)
+{
+ return asfloat(r_reconstructed_previous_nearest_depth[iPxPos]);
+}
+/*
+ d00 (-1,1) *------* d10 (0,-1)
+ | |
+ | |
+ d01 (-1,0) *------* d11 (0,0)
+*/
+void GatherReconstructedPreviousDepthRQuad(FfxFloat32x2 fUV,
+ FFXM_PARAMETER_INOUT FfxFloat32 d00,
+ FFXM_PARAMETER_INOUT FfxFloat32 d10,
+ FFXM_PARAMETER_INOUT FfxFloat32 d01,
+ FFXM_PARAMETER_INOUT FfxFloat32 d11)
+{
+ FfxUInt32x4 rrrr = r_reconstructed_previous_nearest_depth.GatherRed(s_PointClamp, fUV);
+ d01 = FfxFloat32(asfloat(rrrr.x));
+ d11 = FfxFloat32(asfloat(rrrr.y));
+ d10 = FfxFloat32(asfloat(rrrr.z));
+ d00 = FfxFloat32(asfloat(rrrr.w));
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
+void StoreReconstructedDepth(FfxUInt32x2 iPxSample, FfxFloat32 fDepth)
+{
+ FfxUInt32 uDepth = asuint(fDepth);
+
+ #if FFXM_FSR2_OPTION_INVERTED_DEPTH
+ InterlockedMax(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth);
+ #else
+ InterlockedMin(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); // min for standard, max for inverted depth
+ #endif
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
+void SetReconstructedDepth(FfxUInt32x2 iPxSample, const FfxUInt32 uValue)
+{
+ rw_reconstructed_previous_nearest_depth[iPxSample] = uValue;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS)
+FFXM_MIN16_F2 LoadDilatedMotionVector(FfxUInt32x2 iPxInput)
+{
+ return r_dilated_motion_vectors[iPxInput].xy;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS)
+FFXM_MIN16_F2 LoadPreviousDilatedMotionVector(FfxUInt32x2 iPxInput)
+{
+ return r_previous_dilated_motion_vectors[iPxInput].xy;
+}
+
+FFXM_MIN16_F2 SamplePreviousDilatedMotionVector(FfxFloat32x2 uv)
+{
+ return r_previous_dilated_motion_vectors.SampleLevel(s_LinearClamp, uv, 0).xy;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_DILATED_DEPTH)
+FfxFloat32 LoadDilatedDepth(FfxUInt32x2 iPxInput)
+{
+ return r_dilatedDepth[iPxInput];
+}
+/*
+ dd00 (-1,1) *------* dd10 (0,-1)
+ | |
+ | |
+ dd01 (-1,0) *------* dd11 (0,0)
+*/
+void GatherDilatedDepthRQuad(FfxFloat32x2 fUV,
+ FFXM_PARAMETER_INOUT FfxFloat32 dd00,
+ FFXM_PARAMETER_INOUT FfxFloat32 dd10,
+ FFXM_PARAMETER_INOUT FfxFloat32 dd01,
+ FFXM_PARAMETER_INOUT FfxFloat32 dd11)
+{
+ FfxFloat32x4 rrrr = r_dilatedDepth.GatherRed(s_PointClamp, fUV);
+ dd01 = FfxFloat32(rrrr.x);
+ dd11 = FfxFloat32(rrrr.y);
+ dd10 = FfxFloat32(rrrr.z);
+ dd00 = FfxFloat32(rrrr.w);
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE)
+FfxFloat32 Exposure()
+{
+ FfxFloat32 exposure = r_input_exposure[FfxUInt32x2(0, 0)].x;
+
+ if (exposure == 0.0f) {
+ exposure = 1.0f;
+ }
+
+ return exposure;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE)
+FfxFloat32 AutoExposure()
+{
+ FfxFloat32 exposure = r_auto_exposure[FfxUInt32x2(0, 0)].x;
+
+ if (exposure == 0.0f) {
+ exposure = 1.0f;
+ }
+
+ return exposure;
+}
+#endif
+
+FfxFloat32 SampleLanczos2Weight(FfxFloat32 x)
+{
+#if defined(FSR2_BIND_SRV_LANCZOS_LUT)
+ return r_lanczos_lut.SampleLevel(s_LinearClamp, FfxFloat32x2(x / 2, 0.5f), 0);
+#else
+ return 0.f;
+#endif
+}
+
+#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT)
+FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv)
+{
+ // Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range.
+ return FfxFloat32(2.0) * r_upsample_maximum_bias_lut.SampleLevel(s_LinearClamp, abs(uv) * 2.0, 0);
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_TEMPORAL_REACTIVE)
+FfxFloat32 SampleTemporalReactive(FfxFloat32x2 fUV)
+{
+ return r_internal_temporal_reactive.SampleLevel(s_LinearClamp, fUV, 0);
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS)
+FFXM_MIN16_F2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV)
+{
+ return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0);
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS)
+FFXM_MIN16_F2 LoadDilatedReactiveMasks(FFXM_PARAMETER_IN FfxUInt32x2 iPxPos)
+{
+ return r_dilated_reactive_masks[iPxPos];
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY)
+FfxFloat32x3 LoadOpaqueOnly(FFXM_PARAMETER_IN FFXM_MIN16_I2 iPxPos)
+{
+ return r_input_opaque_only[UNITY_FSR_POS(iPxPos)].xyz;
+}
+#endif
+
+FfxFloat32x2 SPD_LoadExposureBuffer()
+{
+#if defined FSR2_BIND_SRV_AUTO_EXPOSURE
+ return r_auto_exposure[FfxInt32x2(0, 0)].rg;
+#elif defined FSR2_BIND_UAV_AUTO_EXPOSURE
+ return rw_auto_exposure[FfxInt32x2(0, 0)].rg;
+#else
+ return FfxFloat32x2(0.f, 0.f);
+#endif // #if defined FSR2_BIND_UAV_AUTO_EXPOSURE
+}
+
+void SPD_SetExposureBuffer(FfxFloat32x2 value)
+{
+#if defined FSR2_BIND_UAV_AUTO_EXPOSURE
+#if FFXM_SHADER_PLATFORM_GLES_3_2
+ rw_auto_exposure[FfxInt32x2(0, 0)] = FfxInt32x4(value, 0.0f, 0.0f);
+#else
+ rw_auto_exposure[FfxInt32x2(0, 0)] = value;
+#endif
+#endif // #if defined FSR2_BIND_UAV_AUTO_EXPOSURE
+}
+
+FfxFloat32x4 SPD_LoadMipmap5(FfxInt32x2 iPxPos)
+{
+#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
+ return FfxFloat32x4(rw_img_mip_5[iPxPos], 0, 0, 0);
+#else
+ return FfxFloat32x4(0.f, 0.f, 0.f, 0.f);
+#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
+}
+
+void SPD_SetMipmap(FfxInt32x2 iPxPos, FfxUInt32 slice, FfxFloat32 value)
+{
+ switch (slice)
+ {
+ case FFXM_FSR2_SHADING_CHANGE_MIP_LEVEL:
+#if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
+ rw_img_mip_shading_change[iPxPos] = value;
+#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
+ break;
+ case 5:
+#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
+ rw_img_mip_5[iPxPos] = value;
+#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
+ break;
+ default:
+
+ // avoid flattened side effect
+#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE)
+ rw_img_mip_shading_change[iPxPos] = rw_img_mip_shading_change[iPxPos];
+#elif defined(FSR2_BIND_UAV_EXPOSURE_MIP_5)
+ rw_img_mip_5[iPxPos] = rw_img_mip_5[iPxPos];
+#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
+ break;
+ }
+}
+
+void SPD_IncreaseAtomicCounter(inout FfxUInt32 spdCounter)
+{
+#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
+ InterlockedAdd(rw_spd_global_atomic[FfxInt32x2(0, 0)], 1, spdCounter);
+#endif // #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
+}
+
+void SPD_ResetAtomicCounter()
+{
+#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
+ rw_spd_global_atomic[FfxInt32x2(0, 0)] = 0;
+#endif // #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
+}
+
+#endif // #if defined(FFXM_GPU)
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h.meta
new file mode 100644
index 0000000..fc47d4c
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_callbacks_hlsl.h.meta
@@ -0,0 +1,67 @@
+fileFormatVersion: 2
+guid: 2dc46407945236c43a0c460b616f4204
+PluginImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ iconMap: {}
+ executionOrder: {}
+ defineConstraints: []
+ isPreloaded: 0
+ isOverridable: 1
+ isExplicitlyReferenced: 0
+ validateReferences: 1
+ platformData:
+ - first:
+ : Any
+ second:
+ enabled: 0
+ settings:
+ Exclude Android: 1
+ Exclude Editor: 1
+ Exclude GameCoreScarlett: 1
+ Exclude GameCoreXboxOne: 1
+ Exclude Linux64: 1
+ Exclude OSXUniversal: 1
+ Exclude PS4: 1
+ Exclude PS5: 1
+ Exclude WebGL: 1
+ Exclude Win: 1
+ Exclude Win64: 1
+ - first:
+ Any:
+ second:
+ enabled: 0
+ settings: {}
+ - first:
+ Editor: Editor
+ second:
+ enabled: 0
+ settings:
+ DefaultValueInitialized: true
+ - first:
+ Standalone: Linux64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: OSXUniversal
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_common.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_common.h
new file mode 100644
index 0000000..662ba2a
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_common.h
@@ -0,0 +1,595 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#if !defined(FFXM_FSR2_COMMON_H)
+#define FFXM_FSR2_COMMON_H
+
+#if defined(FFXM_CPU) || defined(FFXM_GPU)
+//Locks
+#define LOCK_LIFETIME_REMAINING 0
+#define LOCK_TEMPORAL_LUMA 1
+#endif // #if defined(FFXM_CPU) || defined(FFXM_GPU)
+
+#if defined(FFXM_GPU)
+FFXM_STATIC const FfxFloat32 FSR2_FP16_MIN = 6.10e-05f;
+FFXM_STATIC const FfxFloat32 FSR2_FP16_MAX = 65504.0f;
+FFXM_STATIC const FfxFloat32 FSR2_EPSILON = 1e-03f;
+FFXM_STATIC const FfxFloat32 FSR2_TONEMAP_EPSILON = 1.0f / FSR2_FP16_MAX;
+FFXM_STATIC const FfxFloat32 FSR2_FLT_MAX = 3.402823466e+38f;
+FFXM_STATIC const FfxFloat32 FSR2_FLT_MIN = 1.175494351e-38f;
+
+// treat vector truncation warnings as errors
+#pragma warning(error: 3206)
+
+// suppress warnings
+#pragma warning(disable: 3205) // conversion from larger type to smaller
+#pragma warning(disable: 3571) // in ffxPow(f, e), f could be negative
+
+// Reconstructed depth usage
+FFXM_STATIC const FfxFloat32 fReconstructedDepthBilinearWeightThreshold = 0.01f;
+
+// Accumulation
+#if !FFXM_SHADER_QUALITY_OPT_UPSCALING_LANCZOS_5TAP
+FFXM_STATIC const FfxFloat32 fUpsampleLanczosWeightScale = 1.0f / 9.0f;
+#else
+FFXM_STATIC const FfxFloat32 fUpsampleLanczosWeightScale = 1.0f / 5.0f;
+#endif
+FFXM_STATIC const FfxFloat32 fMaxAccumulationLanczosWeight = 1.0f;
+FFXM_STATIC const FfxFloat32 fAverageLanczosWeightPerFrame = 0.74f * fUpsampleLanczosWeightScale; // Average lanczos weight for jitter accumulated samples
+FFXM_STATIC const FfxFloat32 fAccumulationMaxOnMotion = 3.0f * fUpsampleLanczosWeightScale;
+
+// Auto exposure
+FFXM_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e4f;
+
+// Optimizations defines
+#ifndef FFXM_OPT_USE_GATHER_OPS
+#define FFXM_OPT_USE_GATHER_OPS 0
+#endif
+
+struct AccumulationPassCommonParams
+{
+ FfxInt32x2 iPxHrPos;
+ FfxFloat32x2 fHrUv;
+ FfxFloat32x2 fLrUv_HwSampler;
+ FfxFloat32x2 fMotionVector;
+ FfxFloat32x2 fReprojectedHrUv;
+ FfxFloat32 fHrVelocity;
+ FFXM_MIN16_F fDepthClipFactor;
+ FFXM_MIN16_F fDilatedReactiveFactor;
+ FFXM_MIN16_F fAccumulationMask;
+
+ //FfxBoolean bIsResetFrame;
+ FfxBoolean bIsExistingSample;
+ FfxBoolean bIsNewSample;
+};
+
+struct LockState
+{
+ FfxBoolean NewLock; //Set for both unique new and re-locked new
+ FfxBoolean WasLockedPrevFrame; //Set to identify if the pixel was already locked (relock)
+};
+
+void InitializeNewLockSample(FFXM_PARAMETER_OUT FfxFloat32x2 fLockStatus)
+{
+ fLockStatus = FfxFloat32x2(0, 0);
+}
+
+#if FFXM_HALF
+void InitializeNewLockSample(FFXM_PARAMETER_OUT FFXM_MIN16_F2 fLockStatus)
+{
+ fLockStatus = FFXM_MIN16_F2(0, 0);
+}
+#endif
+
+
+void KillLock(FFXM_PARAMETER_INOUT FfxFloat32x2 fLockStatus)
+{
+ fLockStatus[LOCK_LIFETIME_REMAINING] = 0;
+}
+
+#if FFXM_HALF
+void KillLock(FFXM_PARAMETER_INOUT FFXM_MIN16_F2 fLockStatus)
+{
+ fLockStatus[LOCK_LIFETIME_REMAINING] = FFXM_MIN16_F(0);
+}
+#endif
+
+struct RectificationBox
+{
+ FfxFloat32x3 boxCenter;
+ FfxFloat32x3 boxVec;
+ FfxFloat32x3 aabbMin;
+ FfxFloat32x3 aabbMax;
+ FfxFloat32 fBoxCenterWeight;
+};
+#if FFXM_HALF
+struct RectificationBoxMin16
+{
+ FFXM_MIN16_F3 boxCenter;
+ FFXM_MIN16_F3 boxVec;
+ FFXM_MIN16_F3 aabbMin;
+ FFXM_MIN16_F3 aabbMax;
+ FFXM_MIN16_F fBoxCenterWeight;
+};
+#endif
+
+void RectificationBoxReset(FFXM_PARAMETER_INOUT RectificationBox rectificationBox)
+{
+ rectificationBox.fBoxCenterWeight = FfxFloat32(0);
+
+ rectificationBox.boxCenter = FfxFloat32x3(0, 0, 0);
+ rectificationBox.boxVec = FfxFloat32x3(0, 0, 0);
+ rectificationBox.aabbMin = FfxFloat32x3(FSR2_FLT_MAX, FSR2_FLT_MAX, FSR2_FLT_MAX);
+ rectificationBox.aabbMax = -FfxFloat32x3(FSR2_FLT_MAX, FSR2_FLT_MAX, FSR2_FLT_MAX);
+}
+#if FFXM_HALF
+void RectificationBoxReset(FFXM_PARAMETER_INOUT RectificationBoxMin16 rectificationBox)
+{
+ rectificationBox.fBoxCenterWeight = FFXM_MIN16_F(0);
+
+ rectificationBox.boxCenter = FFXM_MIN16_F3(0, 0, 0);
+ rectificationBox.boxVec = FFXM_MIN16_F3(0, 0, 0);
+ rectificationBox.aabbMin = FFXM_MIN16_F3(FSR2_FP16_MAX, FSR2_FP16_MAX, FSR2_FP16_MAX);
+ rectificationBox.aabbMax = -FFXM_MIN16_F3(FSR2_FP16_MAX, FSR2_FP16_MAX, FSR2_FP16_MAX);
+}
+#endif
+
+void RectificationBoxAddInitialSample(FFXM_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight)
+{
+ rectificationBox.aabbMin = colorSample;
+ rectificationBox.aabbMax = colorSample;
+
+ FfxFloat32x3 weightedSample = colorSample * fSampleWeight;
+ rectificationBox.boxCenter = weightedSample;
+ rectificationBox.boxVec = colorSample * weightedSample;
+ rectificationBox.fBoxCenterWeight = fSampleWeight;
+}
+
+void RectificationBoxAddSample(FfxBoolean bInitialSample, FFXM_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight)
+{
+ if (bInitialSample) {
+ RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight);
+ } else {
+ rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample);
+ rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample);
+
+ FfxFloat32x3 weightedSample = colorSample * fSampleWeight;
+ rectificationBox.boxCenter += weightedSample;
+ rectificationBox.boxVec += colorSample * weightedSample;
+ rectificationBox.fBoxCenterWeight += fSampleWeight;
+ }
+}
+#if FFXM_HALF
+void RectificationBoxAddInitialSample(FFXM_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFXM_MIN16_F3 colorSample, const FFXM_MIN16_F fSampleWeight)
+{
+ rectificationBox.aabbMin = colorSample;
+ rectificationBox.aabbMax = colorSample;
+
+ FFXM_MIN16_F3 weightedSample = colorSample * fSampleWeight;
+ rectificationBox.boxCenter = weightedSample;
+ rectificationBox.boxVec = colorSample * weightedSample;
+ rectificationBox.fBoxCenterWeight = fSampleWeight;
+}
+
+void RectificationBoxAddSample(FfxBoolean bInitialSample, FFXM_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFXM_MIN16_F3 colorSample, const FFXM_MIN16_F fSampleWeight)
+{
+ if (bInitialSample) {
+ RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight);
+ } else {
+ rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample);
+ rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample);
+
+ FFXM_MIN16_F3 weightedSample = colorSample * fSampleWeight;
+ rectificationBox.boxCenter += weightedSample;
+ rectificationBox.boxVec += colorSample * weightedSample;
+ rectificationBox.fBoxCenterWeight += fSampleWeight;
+ }
+}
+#endif
+
+void RectificationBoxComputeVarianceBoxData(FFXM_PARAMETER_INOUT RectificationBox rectificationBox)
+{
+ rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FfxFloat32(FSR2_EPSILON) ? rectificationBox.fBoxCenterWeight : FfxFloat32(1.f));
+ rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight;
+ rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight;
+ FfxFloat32x3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter));
+ rectificationBox.boxVec = stdDev;
+}
+#if FFXM_HALF
+void RectificationBoxComputeVarianceBoxData(FFXM_PARAMETER_INOUT RectificationBoxMin16 rectificationBox)
+{
+ rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FFXM_MIN16_F(FSR2_EPSILON) ? rectificationBox.fBoxCenterWeight : FFXM_MIN16_F(1.f));
+ rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight;
+ rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight;
+ FFXM_MIN16_F3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter));
+ rectificationBox.boxVec = stdDev;
+}
+#endif
+
+FfxFloat32x3 SafeRcp3(FfxFloat32x3 v)
+{
+ return (all(FFXM_NOT_EQUAL(v, FfxFloat32x3(0, 0, 0)))) ? (FfxFloat32x3(1, 1, 1) / v) : FfxFloat32x3(0, 0, 0);
+}
+#if FFXM_HALF
+FFXM_MIN16_F3 SafeRcp3(FFXM_MIN16_F3 v)
+{
+ return (all(FFXM_NOT_EQUAL(v, FFXM_MIN16_F3(0, 0, 0)))) ? (FFXM_MIN16_F3(1, 1, 1) / v) : FFXM_MIN16_F3(0, 0, 0);
+}
+#endif
+
+FfxFloat32 MinDividedByMax(const FfxFloat32 v0, const FfxFloat32 v1)
+{
+ const FfxFloat32 m = ffxMax(v0, v1);
+ return m != 0 ? ffxMin(v0, v1) / m : 0;
+}
+
+#if FFXM_HALF
+FFXM_MIN16_F MinDividedByMax(const FFXM_MIN16_F v0, const FFXM_MIN16_F v1)
+{
+ const FFXM_MIN16_F m = ffxMax(v0, v1);
+ return m != FFXM_MIN16_F(0) ? ffxMin(v0, v1) / m : FFXM_MIN16_F(0);
+}
+#endif
+
+FfxFloat32x3 YCoCgToRGB(FfxFloat32x3 fYCoCg)
+{
+ FfxFloat32x3 fRgb;
+
+ fRgb = FfxFloat32x3(
+ fYCoCg.x + fYCoCg.y - fYCoCg.z,
+ fYCoCg.x + fYCoCg.z,
+ fYCoCg.x - fYCoCg.y - fYCoCg.z);
+
+ return fRgb;
+}
+#if FFXM_HALF
+FFXM_MIN16_F3 YCoCgToRGB(FFXM_MIN16_F3 fYCoCg)
+{
+ FFXM_MIN16_F3 fRgb;
+
+ fRgb = FFXM_MIN16_F3(
+ fYCoCg.x + fYCoCg.y - fYCoCg.z,
+ fYCoCg.x + fYCoCg.z,
+ fYCoCg.x - fYCoCg.y - fYCoCg.z);
+
+ return fRgb;
+}
+#endif
+
+FfxFloat32x3 RGBToYCoCg(FfxFloat32x3 fRgb)
+{
+ FfxFloat32x3 fYCoCg;
+
+ fYCoCg = FfxFloat32x3(
+ 0.25f * fRgb.r + 0.5f * fRgb.g + 0.25f * fRgb.b,
+ 0.5f * fRgb.r - 0.5f * fRgb.b,
+ -0.25f * fRgb.r + 0.5f * fRgb.g - 0.25f * fRgb.b);
+
+ return fYCoCg;
+}
+#if FFXM_HALF
+FFXM_MIN16_F3 RGBToYCoCg(FFXM_MIN16_F3 fRgb)
+{
+ FFXM_MIN16_F3 fYCoCg;
+
+ fYCoCg = FFXM_MIN16_F3(
+ 0.25 * fRgb.r + 0.5 * fRgb.g + 0.25 * fRgb.b,
+ 0.5 * fRgb.r - 0.5 * fRgb.b,
+ -0.25 * fRgb.r + 0.5 * fRgb.g - 0.25 * fRgb.b);
+
+ return fYCoCg;
+}
+#endif
+
+FfxFloat32 RGBToLuma(FfxFloat32x3 fLinearRgb)
+{
+ return dot(fLinearRgb, FfxFloat32x3(0.2126f, 0.7152f, 0.0722f));
+}
+#if FFXM_HALF
+FFXM_MIN16_F RGBToLuma(FFXM_MIN16_F3 fLinearRgb)
+{
+ return dot(fLinearRgb, FFXM_MIN16_F3(0.2126f, 0.7152f, 0.0722f));
+}
+#endif
+
+FfxFloat32 RGBToPerceivedLuma(FfxFloat32x3 fLinearRgb)
+{
+ FfxFloat32 fLuminance = RGBToLuma(fLinearRgb);
+
+ FfxFloat32 fPercievedLuminance = 0;
+ if (fLuminance <= 216.0f / 24389.0f) {
+ fPercievedLuminance = fLuminance * (24389.0f / 27.0f);
+ }
+ else {
+ fPercievedLuminance = ffxPow(fLuminance, 1.0f / 3.0f) * 116.0f - 16.0f;
+ }
+
+ return fPercievedLuminance * 0.01f;
+}
+#if FFXM_HALF
+FFXM_MIN16_F RGBToPerceivedLuma(FFXM_MIN16_F3 fLinearRgb)
+{
+ FFXM_MIN16_F fLuminance = RGBToLuma(fLinearRgb);
+
+ FFXM_MIN16_F fPercievedLuminance = FFXM_MIN16_F(0);
+ if (fLuminance <= FFXM_MIN16_F(216.0f / 24389.0f)) {
+ fPercievedLuminance = fLuminance * FFXM_MIN16_F(24389.0f / 27.0f);
+ }
+ else {
+ fPercievedLuminance = ffxPow(fLuminance, FFXM_MIN16_F(1.0f / 3.0f)) * FFXM_MIN16_F(116.0f) - FFXM_MIN16_F(16.0f);
+ }
+
+ return fPercievedLuminance * FFXM_MIN16_F(0.01f);
+}
+#endif
+
+FfxFloat32x3 Tonemap(FfxFloat32x3 fRgb)
+{
+ return fRgb / (ffxMax(ffxMax(0.f, fRgb.r), ffxMax(fRgb.g, fRgb.b)) + 1.f).xxx;
+}
+
+FfxFloat32x3 InverseTonemap(FfxFloat32x3 fRgb)
+{
+ return fRgb / ffxMax(FSR2_TONEMAP_EPSILON, 1.f - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx;
+}
+
+#if FFXM_HALF
+FFXM_MIN16_F3 Tonemap(FFXM_MIN16_F3 fRgb)
+{
+ return fRgb / (ffxMax(ffxMax(FFXM_MIN16_F(0.f), fRgb.r), ffxMax(fRgb.g, fRgb.b)) + FFXM_MIN16_F(1.f)).xxx;
+}
+
+FFXM_MIN16_F3 InverseTonemap(FFXM_MIN16_F3 fRgb)
+{
+ return fRgb / ffxMax(FFXM_MIN16_F(FSR2_TONEMAP_EPSILON), FFXM_MIN16_F(1.f) - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx;
+}
+#endif
+
+FfxInt32x2 ClampLoad(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize)
+{
+ FfxInt32x2 result = iPxSample + iPxOffset;
+ result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x;
+ result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x;
+ result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y;
+ result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y;
+ return result;
+
+ // return ffxMed3(iPxSample + iPxOffset, FfxInt32x2(0, 0), iTextureSize - FfxInt32x2(1, 1));
+}
+#if FFXM_HALF
+FFXM_MIN16_I2 ClampLoad(FFXM_MIN16_I2 iPxSample, FFXM_MIN16_I2 iPxOffset, FFXM_MIN16_I2 iTextureSize)
+{
+ FFXM_MIN16_I2 result = iPxSample + iPxOffset;
+ result.x = (iPxOffset.x < 0) ? ffxMax(result.x, FFXM_MIN16_I(0)) : result.x;
+ result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - FFXM_MIN16_I(1)) : result.x;
+ result.y = (iPxOffset.y < 0) ? ffxMax(result.y, FFXM_MIN16_I(0)) : result.y;
+ result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - FFXM_MIN16_I(1)) : result.y;
+ return result;
+
+ // return ffxMed3Half(iPxSample + iPxOffset, FFXM_MIN16_I2(0, 0), iTextureSize - FFXM_MIN16_I2(1, 1));
+}
+#endif
+
+FfxFloat32x2 ClampUv(FfxFloat32x2 fUv, FfxInt32x2 iTextureSize, FfxInt32x2 iResourceSize)
+{
+ const FfxFloat32x2 fSampleLocation = fUv * iTextureSize;
+ const FfxFloat32x2 fClampedLocation = ffxMax(FfxFloat32x2(0.5f, 0.5f), ffxMin(fSampleLocation, FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f)));
+ const FfxFloat32x2 fClampedUv = fClampedLocation / FfxFloat32x2(iResourceSize);
+
+ return fClampedUv;
+}
+
+FfxBoolean IsOnScreen(FfxInt32x2 pos, FfxInt32x2 size)
+{
+ return all(FFXM_LESS_THAN(FfxUInt32x2(pos), FfxUInt32x2(size)));
+}
+#if FFXM_HALF
+FfxBoolean IsOnScreen(FFXM_MIN16_I2 pos, FFXM_MIN16_I2 size)
+{
+ return all(FFXM_LESS_THAN(FFXM_MIN16_U2(pos), FFXM_MIN16_U2(size)));
+}
+#endif
+
+FfxFloat32 ComputeAutoExposureFromLavg(FfxFloat32 Lavg)
+{
+ Lavg = exp(Lavg);
+
+ const FfxFloat32 S = 100.0f; //ISO arithmetic speed
+ const FfxFloat32 K = 12.5f;
+ FfxFloat32 ExposureISO100 = log2((Lavg * S) / K);
+
+ const FfxFloat32 q = 0.65f;
+ FfxFloat32 Lmax = (78.0f / (q * S)) * ffxPow(2.0f, ExposureISO100);
+
+ return 1 / Lmax;
+}
+#if FFXM_HALF
+FFXM_MIN16_F ComputeAutoExposureFromLavg(FFXM_MIN16_F Lavg)
+{
+ Lavg = exp(Lavg);
+
+ const FFXM_MIN16_F S = FFXM_MIN16_F(100.0f); //ISO arithmetic speed
+ const FFXM_MIN16_F K = FFXM_MIN16_F(12.5f);
+ const FFXM_MIN16_F ExposureISO100 = log2((Lavg * S) / K);
+
+ const FFXM_MIN16_F q = FFXM_MIN16_F(0.65f);
+ const FFXM_MIN16_F Lmax = (FFXM_MIN16_F(78.0f) / (q * S)) * ffxPow(FFXM_MIN16_F(2.0f), ExposureISO100);
+
+ return FFXM_MIN16_F(1) / Lmax;
+}
+#endif
+
+FfxInt32x2 ComputeHrPosFromLrPos(FfxInt32x2 iPxLrPos)
+{
+ FfxFloat32x2 fSrcJitteredPos = FfxFloat32x2(iPxLrPos) + 0.5f - Jitter();
+ FfxFloat32x2 fLrPosInHr = (fSrcJitteredPos / RenderSize()) * DisplaySize();
+ FfxInt32x2 iPxHrPos = FfxInt32x2(floor(fLrPosInHr));
+ return iPxHrPos;
+}
+#if FFXM_HALF
+FFXM_MIN16_I2 ComputeHrPosFromLrPos(FFXM_MIN16_I2 iPxLrPos)
+{
+ FFXM_MIN16_F2 fSrcJitteredPos = FFXM_MIN16_F2(iPxLrPos) + FFXM_MIN16_F(0.5f) - FFXM_MIN16_F2(Jitter());
+ FFXM_MIN16_F2 fLrPosInHr = (fSrcJitteredPos / FFXM_MIN16_F2(RenderSize())) * FFXM_MIN16_F2(DisplaySize());
+ FFXM_MIN16_I2 iPxHrPos = FFXM_MIN16_I2(floor(fLrPosInHr));
+ return iPxHrPos;
+}
+#endif
+
+FfxFloat32x2 ComputeNdc(FfxFloat32x2 fPxPos, FfxInt32x2 iSize)
+{
+ return fPxPos / FfxFloat32x2(iSize) * FfxFloat32x2(2.0f, -2.0f) + FfxFloat32x2(-1.0f, 1.0f);
+}
+
+FfxFloat32 GetViewSpaceDepth(FfxFloat32 fDeviceDepth)
+{
+ const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors();
+
+ // fDeviceToViewDepth details found in ffx_fsr2.cpp
+ return (fDeviceToViewDepth[1] / (fDeviceDepth - fDeviceToViewDepth[0]));
+}
+
+FfxFloat32 GetViewSpaceDepthInMeters(FfxFloat32 fDeviceDepth)
+{
+ return GetViewSpaceDepth(fDeviceDepth) * ViewSpaceToMetersFactor();
+}
+
+FfxFloat32x3 GetViewSpacePosition(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth)
+{
+ const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors();
+
+ const FfxFloat32 Z = GetViewSpaceDepth(fDeviceDepth);
+
+ const FfxFloat32x2 fNdcPos = ComputeNdc(iViewportPos, iViewportSize);
+ const FfxFloat32 X = fDeviceToViewDepth[2] * fNdcPos.x * Z;
+ const FfxFloat32 Y = fDeviceToViewDepth[3] * fNdcPos.y * Z;
+
+ return FfxFloat32x3(X, Y, Z);
+}
+
+FfxFloat32x3 GetViewSpacePositionInMeters(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth)
+{
+ return GetViewSpacePosition(iViewportPos, iViewportSize, fDeviceDepth) * ViewSpaceToMetersFactor();
+}
+
+FfxFloat32 GetMaxDistanceInMeters()
+{
+#if FFXM_FSR2_OPTION_INVERTED_DEPTH
+ return GetViewSpaceDepth(0.0f) * ViewSpaceToMetersFactor();
+#else
+ return GetViewSpaceDepth(1.0f) * ViewSpaceToMetersFactor();
+#endif
+}
+
+FfxFloat32x3 PrepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure, FfxFloat32 fPreExposure)
+{
+ fRgb /= fPreExposure;
+ fRgb *= fExposure;
+
+ fRgb = clamp(fRgb, 0.0f, FSR2_FP16_MAX);
+
+ return fRgb;
+}
+
+FfxFloat32x3 UnprepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure)
+{
+ fRgb /= fExposure;
+ fRgb *= PreExposure();
+
+ return fRgb;
+}
+
+#if FFXM_HALF
+FfxFloat16x3 PrepareRgb(FfxFloat16x3 fRgb, FfxFloat16 fExposure, FfxFloat16 fPreExposure)
+{
+ fRgb /= fPreExposure;
+ fRgb *= fExposure;
+
+ fRgb = clamp(fRgb, FfxFloat16(0.0f), FfxFloat16(FSR2_FP16_MAX));
+
+ return fRgb;
+}
+
+FfxFloat16x3 UnprepareRgb(FfxFloat16x3 fRgb, FfxFloat16 fExposure)
+{
+ fRgb /= fExposure;
+ fRgb *= FfxFloat16(PreExposure());
+
+ return fRgb;
+}
+#endif
+
+struct BilinearSamplingData
+{
+ FfxInt32x2 iOffsets[4];
+ FfxFloat32 fWeights[4];
+ FfxInt32x2 iBasePos;
+ FfxFloat32x2 fQuadCenterUv;
+};
+
+BilinearSamplingData GetBilinearSamplingData(FfxFloat32x2 fUv, FfxInt32x2 iSize)
+{
+ BilinearSamplingData data;
+
+ FfxFloat32x2 fPxSample = (fUv * iSize) - FfxFloat32x2(0.5f, 0.5f);
+ data.iBasePos = FfxInt32x2(floor(fPxSample));
+ data.fQuadCenterUv = fPxSample / FfxFloat32x2(iSize);
+ FfxFloat32x2 fPxFrac = ffxFract(fPxSample);
+
+ data.iOffsets[0] = FfxInt32x2(0, 0);
+ data.iOffsets[1] = FfxInt32x2(1, 0);
+ data.iOffsets[2] = FfxInt32x2(0, 1);
+ data.iOffsets[3] = FfxInt32x2(1, 1);
+
+ data.fWeights[0] = (1 - fPxFrac.x) * (1 - fPxFrac.y);
+ data.fWeights[1] = (fPxFrac.x) * (1 - fPxFrac.y);
+ data.fWeights[2] = (1 - fPxFrac.x) * (fPxFrac.y);
+ data.fWeights[3] = (fPxFrac.x) * (fPxFrac.y);
+
+ return data;
+}
+
+struct PlaneData
+{
+ FfxFloat32x3 fNormal;
+ FfxFloat32 fDistanceFromOrigin;
+};
+
+PlaneData GetPlaneFromPoints(FfxFloat32x3 fP0, FfxFloat32x3 fP1, FfxFloat32x3 fP2)
+{
+ PlaneData plane;
+
+ FfxFloat32x3 v0 = fP0 - fP1;
+ FfxFloat32x3 v1 = fP0 - fP2;
+ plane.fNormal = normalize(cross(v0, v1));
+ plane.fDistanceFromOrigin = -dot(fP0, plane.fNormal);
+
+ return plane;
+}
+
+FfxFloat32 PointToPlaneDistance(PlaneData plane, FfxFloat32x3 fPoint)
+{
+ return abs(dot(plane.fNormal, fPoint) + plane.fDistanceFromOrigin);
+}
+
+#endif // #if defined(FFXM_GPU)
+
+#endif //!defined(FFXM_FSR2_COMMON_H)
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_common.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_common.h.meta
new file mode 100644
index 0000000..723aa76
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_common.h.meta
@@ -0,0 +1,67 @@
+fileFormatVersion: 2
+guid: ecee34f12256cf741857fcb5696b0996
+PluginImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ iconMap: {}
+ executionOrder: {}
+ defineConstraints: []
+ isPreloaded: 0
+ isOverridable: 1
+ isExplicitlyReferenced: 0
+ validateReferences: 1
+ platformData:
+ - first:
+ : Any
+ second:
+ enabled: 0
+ settings:
+ Exclude Android: 1
+ Exclude Editor: 1
+ Exclude GameCoreScarlett: 1
+ Exclude GameCoreXboxOne: 1
+ Exclude Linux64: 1
+ Exclude OSXUniversal: 1
+ Exclude PS4: 1
+ Exclude PS5: 1
+ Exclude WebGL: 1
+ Exclude Win: 1
+ Exclude Win64: 1
+ - first:
+ Any:
+ second:
+ enabled: 0
+ settings: {}
+ - first:
+ Editor: Editor
+ second:
+ enabled: 0
+ settings:
+ DefaultValueInitialized: true
+ - first:
+ Standalone: Linux64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: OSXUniversal
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h
new file mode 100644
index 0000000..eb12bce
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h
@@ -0,0 +1,212 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+FFXM_GROUPSHARED FfxUInt32 spdCounter = 0u;
+
+void SpdIncreaseAtomicCounter(FfxUInt32 slice)
+{
+ SPD_IncreaseAtomicCounter(spdCounter);
+}
+
+FfxUInt32 SpdGetAtomicCounter()
+{
+ return spdCounter;
+}
+
+void SpdResetAtomicCounter(FfxUInt32 slice)
+{
+ SPD_ResetAtomicCounter();
+}
+
+#ifndef SPD_PACKED_ONLY
+FFXM_GROUPSHARED FfxFloat32 spdIntermediateR[16][16];
+FFXM_GROUPSHARED FfxFloat32 spdIntermediateG[16][16];
+FFXM_GROUPSHARED FfxFloat32 spdIntermediateB[16][16];
+FFXM_GROUPSHARED FfxFloat32 spdIntermediateA[16][16];
+
+FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 tex, FfxUInt32 slice)
+{
+ FfxFloat32x2 fUv = (tex + 0.5f + Jitter()) / RenderSize();
+ fUv = ClampUv(fUv, RenderSize(), InputColorResourceDimensions());
+ FfxFloat32x3 fRgb = SampleInputColor(fUv);
+
+ fRgb /= PreExposure();
+
+ //compute log luma
+ const FfxFloat32 fLogLuma = log(ffxMax(FSR2_EPSILON, RGBToLuma(fRgb)));
+
+ // Make sure out of screen pixels contribute no value to the end result
+ const FfxFloat32 result = all(FFXM_LESS_THAN(tex, RenderSize())) ? fLogLuma : 0.0f;
+
+ return FfxFloat32x4(result, 0, 0, 0);
+}
+
+FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice)
+{
+ return SPD_LoadMipmap5(tex);
+}
+
+void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice)
+{
+ if (index == LumaMipLevelToUse() || index == 5)
+ {
+ SPD_SetMipmap(pix, index, outValue.r);
+ }
+
+ if (index == MipCount() - 1) { //accumulate on 1x1 level
+
+ if (all(FFXM_EQUAL(pix, FfxInt32x2(0, 0))))
+ {
+ FfxFloat32 prev = SPD_LoadExposureBuffer().y;
+ FfxFloat32 result = outValue.r;
+
+ if (prev < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values
+ {
+ FfxFloat32 rate = 1.0f;
+ result = prev + (result - prev) * (1 - exp(-DeltaTime() * rate));
+ result = ffxMax(0.0f, result);
+ }
+ FfxFloat32x2 spdOutput = FfxFloat32x2(ComputeAutoExposureFromLavg(result), result);
+ SPD_SetExposureBuffer(spdOutput);
+ }
+ }
+}
+
+FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y)
+{
+ return FfxFloat32x4(
+ spdIntermediateR[x][y],
+ spdIntermediateG[x][y],
+ spdIntermediateB[x][y],
+ spdIntermediateA[x][y]);
+}
+void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value)
+{
+ spdIntermediateR[x][y] = value.x;
+ spdIntermediateG[x][y] = value.y;
+ spdIntermediateB[x][y] = value.z;
+ spdIntermediateA[x][y] = value.w;
+}
+FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3)
+{
+ return (v0 + v1 + v2 + v3) * 0.25f;
+}
+#endif
+
+// define fetch and store functions Packed
+#if FFXM_HALF
+
+FFXM_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16];
+FFXM_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16];
+
+FfxFloat16x4 SpdLoadSourceImageH(FfxFloat32x2 tex, FfxUInt32 slice)
+{
+ FfxFloat16x2 fUv = FfxFloat16x2((tex + 0.5f + Jitter()) / RenderSize());
+ fUv = FfxFloat16x2(ClampUv(fUv, RenderSize(), InputColorResourceDimensions()));
+ FfxFloat16x3 fRgb = FfxFloat16x3(SampleInputColor(fUv));
+
+ fRgb /= FfxFloat16(PreExposure());
+
+ //compute log luma
+ const FfxFloat16 fLogLuma = FfxFloat16(log(ffxMax(FSR2_EPSILON, RGBToLuma(fRgb))));
+
+ // Make sure out of screen pixels contribute no value to the end result
+ const FfxFloat16 result = all(FFXM_LESS_THAN(tex, RenderSize())) ? fLogLuma : FfxFloat16(0.0f);
+
+ return FfxFloat16x4(result, 0, 0, 0);
+}
+
+FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice)
+{
+ return FfxFloat16x4(SPD_LoadMipmap5(p));
+}
+
+void SpdStoreH(FfxInt32x2 pix, FfxFloat16x4 outValue, FfxUInt32 index, FfxUInt32 slice)
+{
+ if (index == LumaMipLevelToUse() || index == 5)
+ {
+ SPD_SetMipmap(pix, index, outValue.r);
+ }
+
+ if (index == MipCount() - 1) { //accumulate on 1x1 level
+
+ if (all(FFXM_EQUAL(pix, FfxInt16x2(0, 0))))
+ {
+ FfxFloat16 result = outValue.r;
+
+ // If running with GLES 3.2, remove the smooth exposure transition.
+#if !FFXM_SHADER_PLATFORM_GLES_3_2
+ FfxFloat16 prev = FfxFloat16(SPD_LoadExposureBuffer().y);
+ if (prev < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values
+ {
+ FfxFloat16 rate = FfxFloat16(1.0f);
+ result = FfxFloat16(prev + (result - prev) * (1 - exp(-DeltaTime() * rate)));
+ }
+#endif
+ FfxFloat16x2 spdOutput = FfxFloat16x2(ComputeAutoExposureFromLavg(result), result);
+ SPD_SetExposureBuffer(spdOutput);
+ }
+ }
+}
+
+FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y)
+{
+ return FfxFloat16x4(
+ spdIntermediateRG[x][y].x,
+ spdIntermediateRG[x][y].y,
+ spdIntermediateBA[x][y].x,
+ spdIntermediateBA[x][y].y);
+}
+
+void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value)
+{
+ spdIntermediateRG[x][y] = value.xy;
+ spdIntermediateBA[x][y] = value.zw;
+}
+
+FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3)
+{
+ return (v0 + v1 + v2 + v3) * FfxFloat16(0.25);
+}
+#endif
+
+#include "../spd/ffxm_spd.h"
+
+void ComputeAutoExposure(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex)
+{
+#if FFXM_HALF
+ SpdDownsampleH(
+ FfxUInt32x2(WorkGroupId.xy),
+ FfxUInt32(LocalThreadIndex),
+ FfxUInt32(MipCount()),
+ FfxUInt32(NumWorkGroups()),
+ FfxUInt32(WorkGroupId.z),
+ FfxUInt32x2(WorkGroupOffset()));
+#else
+ SpdDownsample(
+ FfxUInt32x2(WorkGroupId.xy),
+ FfxUInt32(LocalThreadIndex),
+ FfxUInt32(MipCount()),
+ FfxUInt32(NumWorkGroups()),
+ FfxUInt32(WorkGroupId.z),
+ FfxUInt32x2(WorkGroupOffset()));
+#endif
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h.meta
new file mode 100644
index 0000000..7ea9408
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_compute_luminance_pyramid.h.meta
@@ -0,0 +1,67 @@
+fileFormatVersion: 2
+guid: 53658e1078243f24aa98041b58bf721d
+PluginImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ iconMap: {}
+ executionOrder: {}
+ defineConstraints: []
+ isPreloaded: 0
+ isOverridable: 1
+ isExplicitlyReferenced: 0
+ validateReferences: 1
+ platformData:
+ - first:
+ : Any
+ second:
+ enabled: 0
+ settings:
+ Exclude Android: 1
+ Exclude Editor: 1
+ Exclude GameCoreScarlett: 1
+ Exclude GameCoreXboxOne: 1
+ Exclude Linux64: 1
+ Exclude OSXUniversal: 1
+ Exclude PS4: 1
+ Exclude PS5: 1
+ Exclude WebGL: 1
+ Exclude Win: 1
+ Exclude Win64: 1
+ - first:
+ Any:
+ second:
+ enabled: 0
+ settings: {}
+ - first:
+ Editor: Editor
+ second:
+ enabled: 0
+ settings:
+ DefaultValueInitialized: true
+ - first:
+ Standalone: Linux64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: OSXUniversal
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_depth_clip.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_depth_clip.h
new file mode 100644
index 0000000..0b4a00d
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_depth_clip.h
@@ -0,0 +1,349 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#ifndef FFXM_FSR2_DEPTH_CLIP_H
+#define FFXM_FSR2_DEPTH_CLIP_H
+
+// Can casue some temporal instability
+#define OPT_PREFETCH_PREVDEPTH_WITH_GATHER 0
+
+struct DepthClipOutputs
+{
+ FfxFloat32x4 fTonemapped;
+ FfxFloat32x2 fDilatedReactiveMasks;
+};
+
+FFXM_STATIC const FfxFloat32 DepthClipBaseScale = 4.0f;
+
+FfxFloat32 ComputeDepthClip(FfxFloat32x2 fUvSample, FfxFloat32 fCurrentDepthSample)
+{
+ FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(fCurrentDepthSample);
+ BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUvSample, RenderSize());
+
+ FfxFloat32 fDilatedSum = 0.0f;
+ FfxFloat32 fDepth = 0.0f;
+ FfxFloat32 fWeightSum = 0.0f;
+
+
+#if OPT_PREFETCH_PREVDEPTH_WITH_GATHER
+ FfxFloat32 fDepthSamples[4];
+ GatherReconstructedPreviousDepthRQuad(bilinearInfo.fQuadCenterUv,
+ fDepthSamples[0], fDepthSamples[1], fDepthSamples[2], fDepthSamples[3]);
+#endif
+
+ for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++)
+ {
+ const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
+ const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset;
+
+ if (IsOnScreen(iSamplePos, RenderSize()))
+ {
+ const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex];
+ if (fWeight > fReconstructedDepthBilinearWeightThreshold)
+ {
+#if OPT_PREFETCH_PREVDEPTH_WITH_GATHER
+ const FfxFloat32 fPrevDepthSample = fDepthSamples[iSampleIndex];
+#else
+ const FfxFloat32 fPrevDepthSample = LoadReconstructedPrevDepth(iSamplePos);
+#endif
+ const FfxFloat32 fPrevNearestDepthViewSpace = GetViewSpaceDepth(fPrevDepthSample);
+ const FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace;
+
+ if (fDepthDiff > 0.0f) {
+
+#if FFXM_FSR2_OPTION_INVERTED_DEPTH
+ const FfxFloat32 fPlaneDepth = ffxMin(fPrevDepthSample, fCurrentDepthSample);
+#else
+ const FfxFloat32 fPlaneDepth = ffxMax(fPrevDepthSample, fCurrentDepthSample);
+#endif
+
+ const FfxFloat32x3 fCenter = GetViewSpacePosition(FfxInt32x2(RenderSize() * 0.5f), RenderSize(), fPlaneDepth);
+ const FfxFloat32x3 fCorner = GetViewSpacePosition(FfxInt32x2(0, 0), RenderSize(), fPlaneDepth);
+
+ const FfxFloat32 fHalfViewportWidth = length(FfxFloat32x2(RenderSize()));
+ const FfxFloat32 fDepthThreshold = ffxMax(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace);
+
+ const FfxFloat32 Ksep = 1.37e-05f;
+ const FfxFloat32 Kfov = length(fCorner) / length(fCenter);
+ const FfxFloat32 fRequiredDepthSeparation = Ksep * Kfov * fHalfViewportWidth * fDepthThreshold;
+
+ const FfxFloat32 fResolutionFactor = ffxSaturate(length(FfxFloat32x2(RenderSize())) / length(FfxFloat32x2(1920.0f, 1080.0f)));
+ const FfxFloat32 fPower = ffxLerp(1.0f, 3.0f, fResolutionFactor);
+ fDepth += ffxPow(ffxSaturate(FfxFloat32(fRequiredDepthSeparation / fDepthDiff)), fPower) * fWeight;
+ fWeightSum += fWeight;
+ }
+ }
+ }
+ }
+
+ return (fWeightSum > 0) ? ffxSaturate(1.0f - fDepth / fWeightSum) : 0.0f;
+}
+
+FfxFloat32 ComputeMotionDivergence(FfxInt32x2 iPxPos, FfxInt32x2 iPxInputMotionVectorSize)
+{
+ FfxFloat32 minconvergence = 1.0f;
+
+ FfxFloat32x2 fMotionVectorNucleus = LoadInputMotionVector(iPxPos);
+ FfxFloat32 fNucleusVelocityLr = length(fMotionVectorNucleus * RenderSize());
+ FfxFloat32 fMaxVelocityUv = length(fMotionVectorNucleus);
+
+ const FfxFloat32 MotionVectorVelocityEpsilon = 1e-02f;
+
+
+ const FfxFloat32x2 fMVSize = FfxFloat32x2(iPxInputMotionVectorSize);
+ FfxFloat32x2 fPxBaseUv = FfxFloat32x2(iPxPos) / fMVSize;
+ FfxFloat32x2 fUnitUv = FfxFloat32x2(1.0f, 1.0f) / fMVSize;
+
+ FFXM_MIN16_F2 fMotionVectorSamples[9];
+ FFXM_MIN16_F2 fTmpDummy = FFXM_MIN16_F2(0.0f, 0.0f);
+ GatherInputMotionVectorRGQuad(fPxBaseUv,
+ fMotionVectorSamples[0], fMotionVectorSamples[1],
+ fMotionVectorSamples[3], fMotionVectorSamples[4]);
+ GatherInputMotionVectorRGQuad(fUnitUv + fPxBaseUv,
+ fTmpDummy, fMotionVectorSamples[5],
+ fMotionVectorSamples[7], fMotionVectorSamples[8]);
+ fMotionVectorSamples[2] = LoadInputMotionVector(iPxPos + FfxInt32x2(1, -1));
+ fMotionVectorSamples[6] = LoadInputMotionVector(iPxPos + FfxInt32x2(-1, 1));
+
+ if (fNucleusVelocityLr > MotionVectorVelocityEpsilon) {
+ for (FfxInt32 y = -1; y <= 1; ++y)
+ {
+ for (FfxInt32 x = -1; x <= 1; ++x)
+ {
+ FfxInt32 sampleIdx = (y + 1) * 3 + x + 1;
+
+ FfxFloat32x2 fMotionVector = fMotionVectorSamples[sampleIdx]; //LoadInputMotionVector(sp);
+ FfxFloat32 fVelocityUv = length(fMotionVector);
+
+ fMaxVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv);
+ fVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv);
+ minconvergence = ffxMin(minconvergence, dot(fMotionVector / fVelocityUv, fMotionVectorNucleus / fVelocityUv));
+ }
+ }
+ }
+
+ return ffxSaturate(1.0f - minconvergence) * ffxSaturate(fMaxVelocityUv / 0.01f);
+}
+
+FfxFloat32 ComputeDepthDivergence(FfxInt32x2 iPxPos)
+{
+ const FfxFloat32 fMaxDistInMeters = GetMaxDistanceInMeters();
+ FfxFloat32 fDepthMax = 0.0f;
+ FfxFloat32 fDepthMin = fMaxDistInMeters;
+
+ FfxInt32 iMaxDistFound = 0;
+
+ FfxInt32x2 iRenderSize = RenderSize();
+ const FfxFloat32x2 fRenderSize = FfxFloat32x2(iRenderSize);
+ FfxFloat32x2 fPxPosBase = FfxFloat32x2(iPxPos) / fRenderSize;
+ FfxFloat32x2 fUnitUv = FfxFloat32x2(1.0f, 1.0f) / fRenderSize;
+
+ FfxFloat32 fDilatedDepthSamples[9];
+ FfxFloat32 fTmpDummy = 0.0f;
+ GatherDilatedDepthRQuad(fPxPosBase,
+ fDilatedDepthSamples[0], fDilatedDepthSamples[1],
+ fDilatedDepthSamples[3], fDilatedDepthSamples[4]);
+ GatherDilatedDepthRQuad(fUnitUv + fPxPosBase,
+ fTmpDummy, fDilatedDepthSamples[5],
+ fDilatedDepthSamples[7], fDilatedDepthSamples[8]);
+ fDilatedDepthSamples[2] = LoadDilatedDepth(iPxPos + FfxInt32x2(1, -1));
+ fDilatedDepthSamples[6] = LoadDilatedDepth(iPxPos + FfxInt32x2(-1, 1));
+
+ for (FfxInt32 y = -1; y < 2; y++)
+ {
+ for (FfxInt32 x = -1; x < 2; x++)
+ {
+ FfxInt32 sampleIdx = (y + 1) * 3 + x + 1;
+ const FfxInt32x2 iOffset = FfxInt32x2(x, y);
+ const FfxInt32x2 iSamplePos = iPxPos + iOffset;
+
+ const FfxFloat32 fOnScreenFactor = IsOnScreen(iSamplePos, iRenderSize) ? 1.0f : 0.0f;
+ // FfxFloat32 fDepth = GetViewSpaceDepthInMeters(LoadDilatedDepth(iSamplePos)) * fOnScreenFactor;
+ FfxFloat32 fDepth = GetViewSpaceDepthInMeters(fDilatedDepthSamples[sampleIdx]) * fOnScreenFactor;
+
+ iMaxDistFound |= FfxInt32(fMaxDistInMeters == fDepth);
+
+ fDepthMin = ffxMin(fDepthMin, fDepth);
+ fDepthMax = ffxMax(fDepthMax, fDepth);
+ }
+ }
+
+ return (1.0f - fDepthMin / fDepthMax) * (FfxBoolean(iMaxDistFound) ? 0.0f : 1.0f);
+}
+
+FfxFloat32 ComputeTemporalMotionDivergence(FfxInt32x2 iPxPos)
+{
+ const FfxFloat32x2 fUv = FfxFloat32x2(iPxPos + 0.5f) / RenderSize();
+
+ FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos);
+ FfxFloat32x2 fReprojectedUv = fUv + fMotionVector;
+ fReprojectedUv = ClampUv(fReprojectedUv, RenderSize(), MaxRenderSize());
+ FfxFloat32x2 fPrevMotionVector = SamplePreviousDilatedMotionVector(fReprojectedUv);
+
+ float fPxDistance = length(fMotionVector * DisplaySize());
+ return fPxDistance > 1.0f ? ffxLerp(0.0f, 1.0f - ffxSaturate(length(fPrevMotionVector) / length(fMotionVector)), ffxSaturate(ffxPow(fPxDistance / 20.0f, 3.0f))) : 0;
+}
+
+void PreProcessReactiveMasks(FfxInt32x2 iPxLrPos, FfxFloat32 fMotionDivergence, FFXM_PARAMETER_INOUT DepthClipOutputs results)
+{
+ // Compensate for bilinear sampling in accumulation pass
+
+ const FfxInt32x2 iRenderSize = RenderSize();
+ const FfxFloat32x2 fRenderSize = FfxFloat32x2(iRenderSize);
+ FfxFloat32x2 fPxPosBase = FfxFloat32x2(iPxLrPos) / fRenderSize;
+ FfxFloat32x2 fUnitUv = FfxFloat32x2(1.0f, 1.0f) / fRenderSize;
+
+ FFXM_MIN16_F2 fReactiveFactor = FFXM_MIN16_F2(0.0f, fMotionDivergence);
+ FFXM_MIN16_F fMasksSum = FFXM_MIN16_F(0.0f);
+
+ FFXM_MIN16_F fTmpDummy = FFXM_MIN16_F(0.0f);
+ // Reactive samples
+ FFXM_MIN16_F fReactiveSamples[9];
+ GatherReactiveRQuad(fPxPosBase,
+ fReactiveSamples[0], fReactiveSamples[1],
+ fReactiveSamples[3], fReactiveSamples[4]);
+ GatherReactiveRQuad(fUnitUv + fPxPosBase,
+ fTmpDummy, fReactiveSamples[5],
+ fReactiveSamples[7], fReactiveSamples[8]);
+ fReactiveSamples[2] = FFXM_MIN16_F(LoadReactiveMask(iPxLrPos + FfxInt32x2(1, -1)));
+ fReactiveSamples[6] = FFXM_MIN16_F(LoadReactiveMask(iPxLrPos + FfxInt32x2(-1, 1)));
+
+ // Transparency and composition mask samples
+ FFXM_MIN16_F fTransparencyAndCompositionSamples[9];
+ GatherTransparencyAndCompositionMaskRQuad(fPxPosBase,
+ fTransparencyAndCompositionSamples[0], fTransparencyAndCompositionSamples[1],
+ fTransparencyAndCompositionSamples[3], fTransparencyAndCompositionSamples[4]);
+ GatherTransparencyAndCompositionMaskRQuad(fUnitUv + fPxPosBase,
+ fTmpDummy, fTransparencyAndCompositionSamples[5],
+ fTransparencyAndCompositionSamples[7], fTransparencyAndCompositionSamples[8]);
+ fTransparencyAndCompositionSamples[2] = FFXM_MIN16_F(LoadTransparencyAndCompositionMask(iPxLrPos + FfxInt32x2(1, -1)));
+ fTransparencyAndCompositionSamples[6] = FFXM_MIN16_F(LoadTransparencyAndCompositionMask(iPxLrPos + FfxInt32x2(-1, 1)));
+
+ FFXM_UNROLL
+ for (FfxInt32 y = -1; y < 2; y++)
+ {
+ FFXM_UNROLL
+ for (FfxInt32 x = -1; x < 2; x++)
+ {
+ FfxInt32 sampleIdx = (y + 1) * 3 + x + 1;
+ fMasksSum += (fReactiveSamples[sampleIdx] + fTransparencyAndCompositionSamples[sampleIdx]);
+ }
+ }
+
+ if (fMasksSum > FFXM_MIN16_F(0))
+ {
+ const FfxFloat32x2 InputColorSize = FfxFloat32x2(InputColorResourceDimensions());
+ FfxFloat32x2 Base = FfxFloat32x2(iPxLrPos) / InputColorSize;
+ FFXM_MIN16_F3 fInputColorSamples[9];
+ // Input color samples
+ GatherInputColorRGBQuad(Base,
+ fInputColorSamples[0], fInputColorSamples[1], fInputColorSamples[3], fInputColorSamples[4]);
+ fInputColorSamples[2] = LoadInputColor(iPxLrPos + FfxInt32x2(1, -1));
+ fInputColorSamples[5] = LoadInputColor(iPxLrPos + FfxInt32x2(1, 0) );
+ fInputColorSamples[6] = LoadInputColor(iPxLrPos + FfxInt32x2(-1, 1));
+ fInputColorSamples[7] = LoadInputColor(iPxLrPos + FfxInt32x2(0, 1) );
+ fInputColorSamples[8] = LoadInputColor(iPxLrPos + FfxInt32x2(1, 1) );
+
+ FFXM_MIN16_F3 fReferenceColor = fInputColorSamples[4];
+
+ for (FfxInt32 sampleIdx = 0; sampleIdx < 9; sampleIdx++)
+ {
+ FFXM_MIN16_F3 fColorSample = fInputColorSamples[sampleIdx];
+ FFXM_MIN16_F fReactiveSample = fReactiveSamples[sampleIdx];
+ FFXM_MIN16_F fTransparencyAndCompositionSample = fTransparencyAndCompositionSamples[sampleIdx];
+
+ const FfxFloat32 fMaxLenSq = ffxMax(dot(fReferenceColor, fReferenceColor), dot(fColorSample, fColorSample));
+ const FFXM_MIN16_F fSimilarity = dot(fReferenceColor, fColorSample) / fMaxLenSq;
+
+ // Increase power for non-similar samples
+ const FFXM_MIN16_F fPowerBiasMax = FFXM_MIN16_F(6.0f);
+ const FFXM_MIN16_F fSimilarityPower = FFXM_MIN16_F(1.0f + (fPowerBiasMax - fSimilarity * fPowerBiasMax));
+ const FFXM_MIN16_F fWeightedReactiveSample = ffxPow(fReactiveSample, fSimilarityPower);
+ const FFXM_MIN16_F fWeightedTransparencyAndCompositionSample = ffxPow(fTransparencyAndCompositionSample, fSimilarityPower);
+
+ fReactiveFactor = ffxMax(fReactiveFactor, FFXM_MIN16_F2(fWeightedReactiveSample, fWeightedTransparencyAndCompositionSample));
+ }
+ }
+
+ results.fDilatedReactiveMasks = fReactiveFactor;
+}
+
+FfxFloat32x3 ComputePreparedInputColor(FfxInt32x2 iPxLrPos)
+{
+ //We assume linear data. if non-linear input (sRGB, ...),
+ //then we should convert to linear first and back to sRGB on output.
+ FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos));
+
+ fRgb = PrepareRgb(fRgb, Exposure(), PreExposure());
+
+#if FFXM_SHADER_QUALITY_OPT_TONEMAPPED_RGB_PREPARED_INPUT_COLOR
+ const FfxFloat32x3 fPreparedYCoCg = Tonemap(fRgb);
+#else
+ const FfxFloat32x3 fPreparedYCoCg = RGBToYCoCg(fRgb);
+#endif
+
+ return fPreparedYCoCg;
+}
+
+FfxFloat32 EvaluateSurface(FfxInt32x2 iPxPos, FfxFloat32x2 fMotionVector)
+{
+ FfxFloat32 d0 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, -1)));
+ FfxFloat32 d1 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 0)));
+ FfxFloat32 d2 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 1)));
+
+ return 1.0f - FfxFloat32(((d0 - d1) > (d1 * 0.01f)) && ((d1 - d2) > (d2 * 0.01f)));
+}
+
+DepthClipOutputs DepthClip(FfxInt32x2 iPxPos)
+{
+ FfxFloat32x2 fDepthUv = (iPxPos + 0.5f) / RenderSize();
+ FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos);
+
+ // Discard tiny mvs
+ fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.01f);
+
+ const FfxFloat32x2 fDilatedUv = fDepthUv + fMotionVector;
+ const FfxFloat32 fDilatedDepth = LoadDilatedDepth(iPxPos);
+ const FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(LoadInputDepth(iPxPos));
+
+ DepthClipOutputs results;
+
+ // Compute prepared input color and depth clip
+ FfxFloat32 fDepthClip = ComputeDepthClip(fDilatedUv, fDilatedDepth) * EvaluateSurface(iPxPos, fMotionVector);
+ FfxFloat32x3 fPreparedYCoCg = ComputePreparedInputColor(iPxPos);
+ results.fTonemapped = FfxFloat32x4(fPreparedYCoCg, fDepthClip);
+
+ // Compute dilated reactive mask
+#if FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+ FfxInt32x2 iSamplePos = iPxPos;
+#else
+ FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxPos);
+#endif
+
+ FfxFloat32 fMotionDivergence = ComputeMotionDivergence(iSamplePos, RenderSize());
+ FfxFloat32 fTemporalMotionDifference = ffxSaturate(ComputeTemporalMotionDivergence(iPxPos) - ComputeDepthDivergence(iPxPos));
+
+ PreProcessReactiveMasks(iPxPos, ffxMax(fTemporalMotionDifference, fMotionDivergence), results);
+
+ return results;
+}
+
+#endif //!defined( FFXM_FSR2_DEPTH_CLIPH )
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_depth_clip.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_depth_clip.h.meta
new file mode 100644
index 0000000..c35e41a
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_depth_clip.h.meta
@@ -0,0 +1,67 @@
+fileFormatVersion: 2
+guid: 6578e7c7d02073e48926d1974b4d6c92
+PluginImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ iconMap: {}
+ executionOrder: {}
+ defineConstraints: []
+ isPreloaded: 0
+ isOverridable: 1
+ isExplicitlyReferenced: 0
+ validateReferences: 1
+ platformData:
+ - first:
+ : Any
+ second:
+ enabled: 0
+ settings:
+ Exclude Android: 1
+ Exclude Editor: 1
+ Exclude GameCoreScarlett: 1
+ Exclude GameCoreXboxOne: 1
+ Exclude Linux64: 1
+ Exclude OSXUniversal: 1
+ Exclude PS4: 1
+ Exclude PS5: 1
+ Exclude WebGL: 1
+ Exclude Win: 1
+ Exclude Win64: 1
+ - first:
+ Any:
+ second:
+ enabled: 0
+ settings: {}
+ - first:
+ Editor: Editor
+ second:
+ enabled: 0
+ settings:
+ DefaultValueInitialized: true
+ - first:
+ Standalone: Linux64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: OSXUniversal
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h
new file mode 100644
index 0000000..6725573
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h
@@ -0,0 +1,131 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#ifndef FFXM_FSR2_LOCK_H
+#define FFXM_FSR2_LOCK_H
+
+void ClearResourcesForNextFrame(in FfxInt32x2 iPxHrPos)
+{
+ if (all(FFXM_LESS_THAN(iPxHrPos, FfxInt32x2(RenderSize()))))
+ {
+#if FFXM_FSR2_OPTION_INVERTED_DEPTH
+ const FfxUInt32 farZ = 0x0;
+#else
+ const FfxUInt32 farZ = 0x3f800000;
+#endif
+ SetReconstructedDepth(iPxHrPos, farZ);
+ }
+}
+
+FfxBoolean ComputeThinFeatureConfidence(FfxInt32x2 pos)
+{
+ const FfxInt32 RADIUS = 1;
+
+ FFXM_MIN16_F fNucleus = LoadLockInputLuma(pos);
+
+ FFXM_MIN16_F similar_threshold = FFXM_MIN16_F(1.05f);
+ FFXM_MIN16_F dissimilarLumaMin = FFXM_MIN16_F(FSR2_FP16_MAX);
+ FFXM_MIN16_F dissimilarLumaMax = FFXM_MIN16_F(0);
+
+ /*
+ 0 1 2
+ 3 4 5
+ 6 7 8
+ */
+
+ #define SETBIT(x) (1U << x)
+
+ FfxUInt32 mask = SETBIT(4); //flag fNucleus as similar
+
+ const FfxUInt32 uNumRejectionMasks = 4;
+ const FfxUInt32 uRejectionMasks[uNumRejectionMasks] = {
+ SETBIT(0) | SETBIT(1) | SETBIT(3) | SETBIT(4), //Upper left
+ SETBIT(1) | SETBIT(2) | SETBIT(4) | SETBIT(5), //Upper right
+ SETBIT(3) | SETBIT(4) | SETBIT(6) | SETBIT(7), //Lower left
+ SETBIT(4) | SETBIT(5) | SETBIT(7) | SETBIT(8), //Lower right
+ };
+
+ FFXM_MIN16_F lumaSamples [9];
+ FFXM_MIN16_F fTmpDummy = FFXM_MIN16_F(0.0f);
+ const FfxFloat32x2 fInputLumaSize = FfxFloat32x2(MaxRenderSize());
+ const FfxFloat32x2 fPxBaseUv = FfxFloat32x2(pos) / fInputLumaSize;
+ const FfxFloat32x2 fUnitUv = FfxFloat32x2(1.0f, 1.0f) / fInputLumaSize;
+
+ // Gather samples
+ GatherLockInputLumaRQuad(fPxBaseUv,
+ lumaSamples[0], lumaSamples[1],
+ lumaSamples[3], lumaSamples[4]);
+ GatherLockInputLumaRQuad(fUnitUv + fPxBaseUv,
+ fTmpDummy, lumaSamples[5],
+ lumaSamples[7], lumaSamples[8]);
+ lumaSamples[2] = LoadLockInputLuma(pos + FfxInt32x2(1, -1));
+ lumaSamples[6] = LoadLockInputLuma(pos + FfxInt32x2(-1, 1));
+
+ FfxInt32 idx = 0;
+ FFXM_UNROLL
+ for (FfxInt32 y = -RADIUS; y <= RADIUS; y++) {
+ FFXM_UNROLL
+ for (FfxInt32 x = -RADIUS; x <= RADIUS; x++, idx++) {
+ if (x == 0 && y == 0) continue;
+
+ FfxInt32 sampleIdx = (y + 1) * 3 + x + 1;
+ FFXM_MIN16_F sampleLuma = lumaSamples[sampleIdx];
+
+ FFXM_MIN16_F difference = ffxMax(sampleLuma, fNucleus) / ffxMin(sampleLuma, fNucleus);
+
+ if (difference > FFXM_MIN16_F(0) && (difference < similar_threshold)) {
+ mask |= SETBIT(idx);
+ } else {
+ dissimilarLumaMin = ffxMin(dissimilarLumaMin, sampleLuma);
+ dissimilarLumaMax = ffxMax(dissimilarLumaMax, sampleLuma);
+ }
+ }
+ }
+
+ FfxBoolean isRidge = fNucleus > dissimilarLumaMax || fNucleus < dissimilarLumaMin;
+
+ if (FFXM_FALSE == isRidge) {
+
+ return false;
+ }
+
+ FFXM_UNROLL
+ for (FfxInt32 i = 0; i < 4; i++) {
+
+ if ((mask & uRejectionMasks[i]) == uRejectionMasks[i]) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void ComputeLock(FfxInt32x2 iPxLrPos)
+{
+ if (ComputeThinFeatureConfidence(iPxLrPos))
+ {
+ StoreNewLocks(ComputeHrPosFromLrPos(iPxLrPos), 1.f);
+ }
+
+ //ClearResourcesForNextFrame(iPxLrPos);
+}
+
+#endif // FFXM_FSR2_LOCK_H
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h.meta
new file mode 100644
index 0000000..f399aac
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_lock.h.meta
@@ -0,0 +1,67 @@
+fileFormatVersion: 2
+guid: 920d5b937231132469bcb0f2a38d2d80
+PluginImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ iconMap: {}
+ executionOrder: {}
+ defineConstraints: []
+ isPreloaded: 0
+ isOverridable: 1
+ isExplicitlyReferenced: 0
+ validateReferences: 1
+ platformData:
+ - first:
+ : Any
+ second:
+ enabled: 0
+ settings:
+ Exclude Android: 1
+ Exclude Editor: 1
+ Exclude GameCoreScarlett: 1
+ Exclude GameCoreXboxOne: 1
+ Exclude Linux64: 1
+ Exclude OSXUniversal: 1
+ Exclude PS4: 1
+ Exclude PS5: 1
+ Exclude WebGL: 1
+ Exclude Win: 1
+ Exclude Win64: 1
+ - first:
+ Any:
+ second:
+ enabled: 0
+ settings: {}
+ - first:
+ Editor: Editor
+ second:
+ enabled: 0
+ settings:
+ DefaultValueInitialized: true
+ - first:
+ Standalone: Linux64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: OSXUniversal
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_postprocess_lock_status.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_postprocess_lock_status.h
new file mode 100644
index 0000000..eab63d3
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_postprocess_lock_status.h
@@ -0,0 +1,101 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#ifndef FFXM_FSR2_POSTPROCESS_LOCK_STATUS_H
+#define FFXM_FSR2_POSTPROCESS_LOCK_STATUS_H
+
+FfxFloat32x4 WrapShadingChangeLuma(FfxInt32x2 iPxSample)
+{
+ return FfxFloat32x4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0);
+}
+
+#if FFXM_HALF
+FFXM_MIN16_F4 WrapShadingChangeLuma(FFXM_MIN16_I2 iPxSample)
+{
+ return FFXM_MIN16_F4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0);
+}
+#endif
+
+#if FFXM_HALF
+DeclareCustomFetchBilinearSamplesMin16(FetchShadingChangeLumaSamples, WrapShadingChangeLuma)
+DeclareCustomTextureSampleMin16(ShadingChangeLumaSample, Bilinear, FetchShadingChangeLumaSamples)
+#else
+DeclareCustomFetchBicubicSamples(FetchShadingChangeLumaSamples, WrapShadingChangeLuma)
+DeclareCustomTextureSample(ShadingChangeLumaSample, Lanczos2, FetchShadingChangeLumaSamples)
+#endif
+
+FfxFloat32 GetShadingChangeLuma(FfxInt32x2 iPxHrPos, FfxFloat32x2 fUvCoord)
+{
+ FfxFloat32 fShadingChangeLuma = 0;
+ const FfxFloat32 fDiv = FfxFloat32(FfxInt32(2) << LumaMipLevelToUse());
+ FfxInt32x2 iMipRenderSize = FfxInt32x2(RenderSize() / fDiv);
+
+ fUvCoord = ClampUv(fUvCoord, iMipRenderSize, LumaMipDimensions());
+ fShadingChangeLuma = Exposure() * exp(FfxFloat32(SampleMipLuma(fUvCoord, LumaMipLevelToUse())));
+
+ fShadingChangeLuma = ffxPow(fShadingChangeLuma, 1.0f / 6.0f);
+
+ return fShadingChangeLuma;
+}
+
+void UpdateLockStatus(const AccumulationPassCommonParams params,
+ FFXM_PARAMETER_INOUT FfxFloat32 fReactiveFactor, LockState state,
+ FFXM_PARAMETER_INOUT FfxFloat32x2 fLockStatus,
+ FFXM_PARAMETER_OUT FfxFloat32 fLockContributionThisFrame,
+ FFXM_PARAMETER_OUT FfxFloat32 fLuminanceDiff) {
+
+ const FfxFloat32 fShadingChangeLuma = GetShadingChangeLuma(params.iPxHrPos, params.fHrUv);
+
+ //init temporal shading change factor, init to -1 or so in reproject to know if "true new"?
+ fLockStatus[LOCK_TEMPORAL_LUMA] = (fLockStatus[LOCK_TEMPORAL_LUMA] == FfxFloat32(0.0f)) ? fShadingChangeLuma : fLockStatus[LOCK_TEMPORAL_LUMA];
+
+ FfxFloat32 fPreviousShadingChangeLuma = fLockStatus[LOCK_TEMPORAL_LUMA];
+
+ fLuminanceDiff = 1.0f - MinDividedByMax(fPreviousShadingChangeLuma, fShadingChangeLuma);
+
+ if (state.NewLock) {
+ fLockStatus[LOCK_TEMPORAL_LUMA] = fShadingChangeLuma;
+
+ fLockStatus[LOCK_LIFETIME_REMAINING] = (fLockStatus[LOCK_LIFETIME_REMAINING] != 0.0f) ? 2.0f : 1.0f;
+ }
+ else if(fLockStatus[LOCK_LIFETIME_REMAINING] <= 1.0f) {
+ fLockStatus[LOCK_TEMPORAL_LUMA] = ffxLerp(fLockStatus[LOCK_TEMPORAL_LUMA], FfxFloat32(fShadingChangeLuma), 0.5f);
+ }
+ else {
+ if (fLuminanceDiff > 0.1f) {
+ KillLock(fLockStatus);
+ }
+ }
+
+ fReactiveFactor = ffxMax(fReactiveFactor, ffxSaturate((fLuminanceDiff - 0.1f) * 10.0f));
+ fLockStatus[LOCK_LIFETIME_REMAINING] *= (1.0f - fReactiveFactor);
+
+ fLockStatus[LOCK_LIFETIME_REMAINING] *= ffxSaturate(1.0f - params.fAccumulationMask);
+ fLockStatus[LOCK_LIFETIME_REMAINING] *= FfxFloat32(params.fDepthClipFactor < 0.1f);
+
+ // Compute this frame lock contribution
+ const FfxFloat32 fLifetimeContribution = ffxSaturate(fLockStatus[LOCK_LIFETIME_REMAINING] - 1.0f);
+ const FfxFloat32 fShadingChangeContribution = ffxSaturate(MinDividedByMax(fLockStatus[LOCK_TEMPORAL_LUMA], fShadingChangeLuma));
+
+ fLockContributionThisFrame = ffxSaturate(ffxSaturate(fLifetimeContribution * 4.0f) * fShadingChangeContribution);
+}
+
+#endif //!defined( FFXM_FSR2_POSTPROCESS_LOCK_STATUS_H )
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_postprocess_lock_status.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_postprocess_lock_status.h.meta
new file mode 100644
index 0000000..b5dce57
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_postprocess_lock_status.h.meta
@@ -0,0 +1,67 @@
+fileFormatVersion: 2
+guid: 976d18e7892c5c444bbcb4d17322fefb
+PluginImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ iconMap: {}
+ executionOrder: {}
+ defineConstraints: []
+ isPreloaded: 0
+ isOverridable: 1
+ isExplicitlyReferenced: 0
+ validateReferences: 1
+ platformData:
+ - first:
+ : Any
+ second:
+ enabled: 0
+ settings:
+ Exclude Android: 1
+ Exclude Editor: 1
+ Exclude GameCoreScarlett: 1
+ Exclude GameCoreXboxOne: 1
+ Exclude Linux64: 1
+ Exclude OSXUniversal: 1
+ Exclude PS4: 1
+ Exclude PS5: 1
+ Exclude WebGL: 1
+ Exclude Win: 1
+ Exclude Win64: 1
+ - first:
+ Any:
+ second:
+ enabled: 0
+ settings: {}
+ - first:
+ Editor: Editor
+ second:
+ enabled: 0
+ settings:
+ DefaultValueInitialized: true
+ - first:
+ Standalone: Linux64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: OSXUniversal
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_rcas.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_rcas.h
new file mode 100644
index 0000000..a0c5e5f
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_rcas.h
@@ -0,0 +1,91 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#define GROUP_SIZE 8
+#define FSR_RCAS_DENOISE 1
+
+#include "../ffxm_core.h"
+
+struct RCASOutputs
+{
+ FfxFloat32x3 fUpscaledColor;
+};
+
+#if FFXM_HALF
+#define USE_FSR_RCASH 1
+#else
+#define USE_FSR_RCASH 0
+#endif
+
+#if USE_FSR_RCASH
+#define FSR_RCAS_H 1
+FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p)
+{
+ FfxFloat16x4 fColor = LoadRCAS_Input(p);
+ fColor.rgb = FfxFloat16x3(PrepareRgb(fColor.rgb, Exposure(), PreExposure()));
+ return fColor;
+}
+void FsrRcasInputH(inout FfxFloat16 r,inout FfxFloat16 g,inout FfxFloat16 b)
+{
+
+}
+
+#else
+#define FSR_RCAS_F 1
+FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p)
+{
+ FfxFloat32x4 fColor = LoadRCAS_Input(p);
+
+ fColor.rgb = PrepareRgb(fColor.rgb, Exposure(), PreExposure());
+
+ return fColor;
+}
+void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {}
+#endif
+
+#include "../fsr1/ffxm_fsr1.h"
+
+void CurrFilter(FFXM_MIN16_U2 pos, FFXM_PARAMETER_INOUT RCASOutputs results)
+{
+#if USE_FSR_RCASH
+ FfxFloat16x3 c;
+ FsrRcasH(c.r, c.g, c.b, pos, RCASConfig());
+
+ c = UnprepareRgb(c, FfxFloat16(Exposure()));
+#else
+ FfxFloat32x3 c;
+ FsrRcasF(c.r, c.g, c.b, pos, RCASConfig());
+
+ c = UnprepareRgb(c, Exposure());
+#endif
+ results.fUpscaledColor = c;
+}
+
+RCASOutputs RCAS(FfxUInt32x2 gxy)
+{
+#ifdef FFXM_HLSL
+ RCASOutputs results = (RCASOutputs)0;
+#else
+ RCASOutputs results;
+#endif
+ CurrFilter(FFXM_MIN16_U2(gxy), results);
+ return results;
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_rcas.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_rcas.h.meta
new file mode 100644
index 0000000..73e1f49
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_rcas.h.meta
@@ -0,0 +1,67 @@
+fileFormatVersion: 2
+guid: 6113e44b0d068db4c954804a6ce38739
+PluginImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ iconMap: {}
+ executionOrder: {}
+ defineConstraints: []
+ isPreloaded: 0
+ isOverridable: 1
+ isExplicitlyReferenced: 0
+ validateReferences: 1
+ platformData:
+ - first:
+ : Any
+ second:
+ enabled: 0
+ settings:
+ Exclude Android: 1
+ Exclude Editor: 1
+ Exclude GameCoreScarlett: 1
+ Exclude GameCoreXboxOne: 1
+ Exclude Linux64: 1
+ Exclude OSXUniversal: 1
+ Exclude PS4: 1
+ Exclude PS5: 1
+ Exclude WebGL: 1
+ Exclude Win: 1
+ Exclude Win64: 1
+ - first:
+ Any:
+ second:
+ enabled: 0
+ settings: {}
+ - first:
+ Editor: Editor
+ second:
+ enabled: 0
+ settings:
+ DefaultValueInitialized: true
+ - first:
+ Standalone: Linux64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: OSXUniversal
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reconstruct_dilated_velocity_and_previous_depth.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reconstruct_dilated_velocity_and_previous_depth.h
new file mode 100644
index 0000000..59bf246
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reconstruct_dilated_velocity_and_previous_depth.h
@@ -0,0 +1,155 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#ifndef FFXM_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H
+#define FFXM_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H
+
+struct ReconstructPrevDepthOutputs
+{
+ FfxFloat32 fDepth;
+ FfxFloat32x2 fMotionVector;
+ FfxFloat32 fLuma;
+};
+
+
+void ReconstructPrevDepth(FfxInt32x2 iPxPos, FfxFloat32 fDepth, FfxFloat32x2 fMotionVector, FfxInt32x2 iPxDepthSize)
+{
+ fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.1f);
+
+ FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5)) / iPxDepthSize;
+ FfxFloat32x2 fReprojectedUv = fUv + fMotionVector;
+
+ BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, RenderSize());
+
+ // Project current depth into previous frame locations.
+ // Push to all pixels having some contribution if reprojection is using bilinear logic.
+ for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) {
+
+ const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
+ FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex];
+
+ if (fWeight > fReconstructedDepthBilinearWeightThreshold) {
+
+ FfxInt32x2 iStorePos = bilinearInfo.iBasePos + iOffset;
+ if (IsOnScreen(iStorePos, iPxDepthSize)) {
+ StoreReconstructedDepth(iStorePos, fDepth);
+ }
+ }
+ }
+}
+
+void FindNearestDepth(FFXM_PARAMETER_IN FfxInt32x2 iPxPos, FFXM_PARAMETER_IN FfxInt32x2 iPxSize, FFXM_PARAMETER_OUT FfxFloat32 fNearestDepth, FFXM_PARAMETER_OUT FfxInt32x2 fNearestDepthCoord)
+{
+ const FfxInt32 iSampleCount = 9;
+ const FfxInt32x2 iSampleOffsets[iSampleCount] = {
+ FfxInt32x2(+0, +0),
+ FfxInt32x2(+1, +0),
+ FfxInt32x2(+0, +1),
+ FfxInt32x2(+0, -1),
+ FfxInt32x2(-1, +0),
+ FfxInt32x2(-1, +1),
+ FfxInt32x2(+1, +1),
+ FfxInt32x2(-1, -1),
+ FfxInt32x2(+1, -1),
+ };
+
+ // pull out the depth loads to allow SC to batch them
+ FfxFloat32 depth[9];
+ FfxInt32 iSampleIndex = 0;
+ FFXM_UNROLL
+ for (iSampleIndex = 0; iSampleIndex < iSampleCount; ++iSampleIndex) {
+
+ FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex];
+ depth[iSampleIndex] = LoadInputDepth(iPos);
+ }
+
+ // find closest depth
+ fNearestDepthCoord = iPxPos;
+ fNearestDepth = depth[0];
+ FFXM_UNROLL
+ for (iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex) {
+
+ FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex];
+ if (IsOnScreen(iPos, iPxSize)) {
+
+ FfxFloat32 fNdDepth = depth[iSampleIndex];
+#if FFXM_FSR2_OPTION_INVERTED_DEPTH
+ if (fNdDepth > fNearestDepth) {
+#else
+ if (fNdDepth < fNearestDepth) {
+#endif
+ fNearestDepthCoord = iPos;
+ fNearestDepth = fNdDepth;
+ }
+ }
+ }
+}
+
+FfxFloat32 ComputeLockInputLuma(FfxInt32x2 iPxLrPos)
+{
+ //We assume linear data. if non-linear input (sRGB, ...),
+ //then we should convert to linear first and back to sRGB on output.
+ FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos));
+
+ // Use internal auto exposure for locking logic
+ fRgb /= PreExposure();
+ fRgb *= Exposure();
+
+#if FFXM_FSR2_OPTION_HDR_COLOR_INPUT
+ fRgb = Tonemap(fRgb);
+#endif
+
+ //compute luma used to lock pixels, if used elsewhere the ffxPow must be moved!
+ const FfxFloat32 fLockInputLuma = ffxPow(RGBToPerceivedLuma(fRgb), FfxFloat32(1.0 / 6.0));
+
+ return fLockInputLuma;
+}
+
+ReconstructPrevDepthOutputs ReconstructAndDilate(FfxInt32x2 iPxLrPos)
+{
+ FfxFloat32 fDilatedDepth;
+ FfxInt32x2 iNearestDepthCoord;
+
+ FindNearestDepth(iPxLrPos, RenderSize(), fDilatedDepth, iNearestDepthCoord);
+
+#if FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+ FfxInt32x2 iSamplePos = iPxLrPos;
+ FfxInt32x2 iMotionVectorPos = iNearestDepthCoord;
+#else
+ FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxLrPos);
+ FfxInt32x2 iMotionVectorPos = ComputeHrPosFromLrPos(iNearestDepthCoord);
+#endif
+
+ FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iMotionVectorPos);
+
+ ReconstructPrevDepthOutputs results;
+
+ results.fDepth = fDilatedDepth;
+ results.fMotionVector = fDilatedMotionVector;
+ ReconstructPrevDepth(iPxLrPos, fDilatedDepth, fDilatedMotionVector, RenderSize());
+ FfxFloat32 fLockInputLuma = ComputeLockInputLuma(iPxLrPos);
+ results.fLuma = fLockInputLuma;
+
+ return results;
+}
+
+
+#endif //!defined( FFXM_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H )
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reconstruct_dilated_velocity_and_previous_depth.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reconstruct_dilated_velocity_and_previous_depth.h.meta
new file mode 100644
index 0000000..945e0ce
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reconstruct_dilated_velocity_and_previous_depth.h.meta
@@ -0,0 +1,67 @@
+fileFormatVersion: 2
+guid: 5e29326796d407b41b4d8a450bbb8fac
+PluginImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ iconMap: {}
+ executionOrder: {}
+ defineConstraints: []
+ isPreloaded: 0
+ isOverridable: 1
+ isExplicitlyReferenced: 0
+ validateReferences: 1
+ platformData:
+ - first:
+ : Any
+ second:
+ enabled: 0
+ settings:
+ Exclude Android: 1
+ Exclude Editor: 1
+ Exclude GameCoreScarlett: 1
+ Exclude GameCoreXboxOne: 1
+ Exclude Linux64: 1
+ Exclude OSXUniversal: 1
+ Exclude PS4: 1
+ Exclude PS5: 1
+ Exclude WebGL: 1
+ Exclude Win: 1
+ Exclude Win64: 1
+ - first:
+ Any:
+ second:
+ enabled: 0
+ settings: {}
+ - first:
+ Editor: Editor
+ second:
+ enabled: 0
+ settings:
+ DefaultValueInitialized: true
+ - first:
+ Standalone: Linux64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: OSXUniversal
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h
new file mode 100644
index 0000000..b8b4c24
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h
@@ -0,0 +1,390 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#ifndef FFXM_FSR2_REPROJECT_H
+#define FFXM_FSR2_REPROJECT_H
+
+#if FFXM_HALF
+FFXM_MIN16_F4 WrapHistory(FFXM_MIN16_I2 iPxSample)
+{
+ return FFXM_MIN16_F4(LoadHistory(iPxSample));
+}
+FFXM_MIN16_F4 SampleHistory(FfxFloat32x2 fUV)
+{
+ return SampleUpscaledHistory(fUV);
+}
+#else
+FfxFloat32x4 WrapHistory(FfxInt32x2 iPxSample)
+{
+ return LoadHistory(iPxSample);
+}
+FfxFloat32x4 SampleHistory(FfxFloat32x2 fUV)
+{
+ return SampleUpscaledHistory(fUV);
+}
+#endif
+
+
+#if FFXM_HALF
+
+#define FFXM_FSR2_REPROJECT_CATMULL_9TAP 0
+#define FFXM_FSR2_REPROJECT_LANCZOS_APPROX_9TAP 1
+#define FFXM_FSR2_REPROJECT_CATMULL_5TAP 2
+
+#if FFXM_SHADER_QUALITY_OPT_REPROJECT_CATMULL_5TAP
+#define FFXM_FSR2_REPROJECT_MODE FFXM_FSR2_REPROJECT_CATMULL_5TAP
+#elif FFXM_SHADER_QUALITY_OPT_REPROJECT_CATMULL_9TAP
+#define FFXM_FSR2_REPROJECT_MODE FFXM_FSR2_REPROJECT_CATMULL_9TAP
+#else // QUALITY
+#define FFXM_FSR2_REPROJECT_MODE FFXM_FSR2_REPROJECT_CATMULL_9TAP
+#endif
+
+#if (FFXM_FSR2_REPROJECT_MODE == FFXM_FSR2_REPROJECT_CATMULL_9TAP)
+struct CatmullRomSamples9Tap
+{
+ // bilinear sampling UV coordinates of the samples
+ FfxFloat32x2 UV[3];
+
+ // weights of the samples
+ FFXM_MIN16_F2 Weight[3];
+
+ // final multiplier (it is faster to multiply 3 RGB values than reweights the 5 weights)
+ FFXM_MIN16_F FinalMultiplier;
+};
+
+CatmullRomSamples9Tap Get2DCatmullRom9Kernel(FfxFloat32x2 uv, FfxFloat32x2 size, in FfxFloat32x2 invSize)
+{
+ CatmullRomSamples9Tap catmullSamples;
+ FfxFloat32x2 samplePos = uv * size;
+ FfxFloat32x2 texPos1 = floor(samplePos - 0.5f) + 0.5f;
+ FfxFloat32x2 f = samplePos - texPos1;
+
+ FfxFloat32x2 w0 = f * (-0.5f + f * (1.0f - 0.5f * f));
+ FfxFloat32x2 w1 = 1.0f + f * f * (-2.5f + 1.5f * f);
+ FfxFloat32x2 w2 = f * (0.5f + f * (2.0f - 1.5f * f));
+ FfxFloat32x2 w3 = f * f * (-0.5f + 0.5f * f);
+
+ catmullSamples.Weight[0] = FFXM_MIN16_F2(w0);
+ catmullSamples.Weight[1] = FFXM_MIN16_F2(w1 + w2);
+ catmullSamples.Weight[2] = FFXM_MIN16_F2(w3);
+
+ FfxFloat32x2 offset12 = w2 / (w1 + w2);
+
+ // Compute the final UV coordinates we'll use for sampling the texture
+ catmullSamples.UV[0] = FfxFloat32x2(texPos1 - 1);
+ catmullSamples.UV[1] = FfxFloat32x2(texPos1 + 2);
+ catmullSamples.UV[2] = FfxFloat32x2(texPos1 + offset12);
+
+ catmullSamples.UV[0] = FfxFloat32x2(catmullSamples.UV[0]*invSize);
+ catmullSamples.UV[1] = FfxFloat32x2(catmullSamples.UV[1]*invSize);
+ catmullSamples.UV[2] = FfxFloat32x2(catmullSamples.UV[2]*invSize);
+ return catmullSamples;
+}
+
+FFXM_MIN16_F4 HistorySample(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize)
+{
+ FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f);
+ FfxFloat32x2 fTextureSize = FfxFloat32x2(iTextureSize);
+ FfxFloat32x2 fInvTextureSize = FfxFloat32x2(1.0f, 1.0f) / fTextureSize;
+ CatmullRomSamples9Tap samples = Get2DCatmullRom9Kernel(fUvSample, fTextureSize, fInvTextureSize);
+
+ FFXM_MIN16_F4 fColor = FFXM_MIN16_F4(0.0f, 0.0f, 0.0f, 0.0f);
+
+ FFXM_MIN16_F4 fColor00 = SampleHistory(FfxFloat32x2(samples.UV[0]));
+ fColor += fColor00 * samples.Weight[0].x * samples.Weight[0].y;
+ FFXM_MIN16_F4 fColor20 = SampleHistory(FfxFloat32x2(samples.UV[2].x, samples.UV[0].y));
+ fColor += fColor20 * samples.Weight[1].x * samples.Weight[0].y;
+ fColor += SampleHistory(FfxFloat32x2(samples.UV[1].x, samples.UV[0].y)) * samples.Weight[2].x * samples.Weight[0].y;
+
+ FFXM_MIN16_F4 fColor02 = SampleHistory(FfxFloat32x2(samples.UV[0].x, samples.UV[2].y));
+ fColor += SampleHistory(FfxFloat32x2(samples.UV[0].x, samples.UV[2].y)) * samples.Weight[0].x * samples.Weight[1].y;
+ FFXM_MIN16_F4 fColor22 = SampleHistory(FfxFloat32x2(samples.UV[2]));
+ fColor += fColor22 * samples.Weight[1].x * samples.Weight[1].y;
+ fColor += SampleHistory(FfxFloat32x2(samples.UV[1].x, samples.UV[2].y)) * samples.Weight[2].x * samples.Weight[1].y;
+
+ fColor += SampleHistory(FfxFloat32x2(samples.UV[0].x, samples.UV[1].y)) * samples.Weight[0].x * samples.Weight[2].y;
+ fColor += SampleHistory(FfxFloat32x2(samples.UV[2].x, samples.UV[1].y)) * samples.Weight[1].x * samples.Weight[2].y;
+ fColor += SampleHistory(FfxFloat32x2(samples.UV[1])) * samples.Weight[2].x * samples.Weight[2].y;
+
+#if !FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING
+ const FFXM_MIN16_F4 fDeringingSamples[4] = {fColor00, fColor20, fColor02, fColor22};
+
+ FFXM_MIN16_F4 fDeringingMin = fDeringingSamples[0];
+ FFXM_MIN16_F4 fDeringingMax = fDeringingSamples[0];
+
+ FFXM_UNROLL
+ for (FfxInt32 iSampleIndex = 1; iSampleIndex < 4; ++iSampleIndex)
+ {
+ fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+ fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+ }
+ fColor = clamp(fColor, fDeringingMin, fDeringingMax);
+#endif
+ return fColor;
+}
+#elif (FFXM_FSR2_REPROJECT_MODE == FFXM_FSR2_REPROJECT_CATMULL_5TAP)
+#define ARM_CATMULL_5TAP_SAMPLE_COUNT 5
+struct CatmullRomSamples
+{
+ // bilinear sampling UV coordinates of the samples
+ FfxFloat32x2 UV[ARM_CATMULL_5TAP_SAMPLE_COUNT];
+ // weights of the samples
+ FFXM_MIN16_F Weight[ARM_CATMULL_5TAP_SAMPLE_COUNT];
+ // final multiplier (it is faster to multiply 3 RGB values than reweights the 5 weights)
+ FFXM_MIN16_F FinalMultiplier;
+};
+
+void Bicubic2DCatmullRom(in FfxFloat32x2 uv, in FfxFloat32x2 size, in FfxFloat32x2 invSize, FFXM_PARAMETER_OUT FfxFloat32x2 samples[3], FFXM_PARAMETER_OUT FfxFloat32x2 weights[3])
+{
+ uv *= size;
+ FfxFloat32x2 tc = floor(uv - 0.5) + 0.5;
+ FfxFloat32x2 f = uv - tc;
+ FfxFloat32x2 f2 = f * f;
+ FfxFloat32x2 f3 = f2 * f;
+ FfxFloat32x2 w0 = f2 - 0.5 * (f3 + f);
+ FfxFloat32x2 w1 = 1.5 * f3 - 2.5 * f2 + 1.f;
+ FfxFloat32x2 w3 = 0.5 * (f3 - f2);
+ FfxFloat32x2 w2 = 1.f - w0 - w1 - w3;
+
+ samples[0] = tc - 1.f;
+ samples[1] = tc + w2 / (w1 + w2);
+ samples[2] = tc + 2.f;
+
+ samples[0] *= invSize;
+ samples[1] *= invSize;
+ samples[2] *= invSize;
+ weights[0] = w0;
+ weights[1] = w1 + w2;
+ weights[2] = w3;
+}
+
+CatmullRomSamples GetBicubic2DCatmullRomSamples(FfxFloat32x2 uv, FfxFloat32x2 size, in FfxFloat32x2 invSize)
+{
+ FfxFloat32x2 weights[3];
+ FfxFloat32x2 samples[3];
+ Bicubic2DCatmullRom(uv, size, invSize, samples, weights);
+
+ CatmullRomSamples crSamples;
+ // optimized by removing corner samples
+ crSamples.UV[0] = FfxFloat32x2(samples[1].x, samples[0].y);
+ crSamples.UV[1] = FfxFloat32x2(samples[0].x, samples[1].y);
+ crSamples.UV[2] = FfxFloat32x2(samples[1].x, samples[1].y);
+ crSamples.UV[3] = FfxFloat32x2(samples[2].x, samples[1].y);
+ crSamples.UV[4] = FfxFloat32x2(samples[1].x, samples[2].y);
+
+ crSamples.Weight[0] = FFXM_MIN16_F(weights[1].x * weights[0].y);
+ crSamples.Weight[1] = FFXM_MIN16_F(weights[0].x * weights[1].y);
+ crSamples.Weight[2] = FFXM_MIN16_F(weights[1].x * weights[1].y);
+ crSamples.Weight[3] = FFXM_MIN16_F(weights[2].x * weights[1].y);
+ crSamples.Weight[4] = FFXM_MIN16_F(weights[1].x * weights[2].y);
+
+ // reweight after removing the corners
+ FFXM_MIN16_F cornerWeights;
+ cornerWeights = crSamples.Weight[0];
+ cornerWeights += crSamples.Weight[1];
+ cornerWeights += crSamples.Weight[2];
+ cornerWeights += crSamples.Weight[3];
+ cornerWeights += crSamples.Weight[4];
+ crSamples.FinalMultiplier = FFXM_MIN16_F(1.f / cornerWeights);
+ return crSamples;
+}
+
+FFXM_MIN16_F4 HistorySample(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize)
+{
+ FfxFloat32x2 fTextureSize = FfxFloat32x2(iTextureSize);
+ FfxFloat32x2 fInvTextureSize = FfxFloat32x2(1.0f, 1.0f) / fTextureSize;
+ CatmullRomSamples samples = GetBicubic2DCatmullRomSamples(fUvSample, fTextureSize, fInvTextureSize);
+
+ FFXM_MIN16_F4 fColor = FFXM_MIN16_F4(0.0f, 0.0f, 0.0f, 0.0f);
+ fColor = SampleHistory(FfxFloat32x2(samples.UV[0])) * samples.Weight[0];
+#if !FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING
+ FFXM_MIN16_F4 fDeringingMin = fColor;
+ FFXM_MIN16_F4 fDeringingMax = fColor;
+#endif
+ for(FfxInt32 iSampleIndex = 1; iSampleIndex < ARM_CATMULL_5TAP_SAMPLE_COUNT; iSampleIndex++)
+ {
+ FFXM_MIN16_F4 fSample = SampleHistory(FfxFloat32x2(samples.UV[iSampleIndex])) * samples.Weight[iSampleIndex];
+ fColor += fSample;
+#if !FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING
+ fDeringingMin = ffxMin(fDeringingMin, fSample);
+ fDeringingMax = ffxMax(fDeringingMax, fSample);
+#endif
+ }
+
+#if !FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING
+ fColor = clamp(fColor, fDeringingMin, fDeringingMax);
+#endif
+ return fColor;
+}
+#elif (FFXM_FSR2_REPROJECT_MODE == FFXM_FSR2_REPROJECT_LANCZOS_APPROX_9TAP)
+
+Fetched9TapSamplesMin16 FetchHistorySamples(FfxInt32x2 iPxSample, FfxInt32x2 iTextureSize)
+{
+ Fetched9TapSamplesMin16 Samples;
+ FfxFloat32x2 iSrcInputUv = FfxFloat32x2(iPxSample) / FfxFloat32x2(iTextureSize);
+ FfxFloat32x2 unitOffsetUv = FfxFloat32x2(1.0f, 1.0f) / FfxFloat32x2(iTextureSize);
+
+ // Collect samples
+ GatherHistoryColorRGBQuad(FfxFloat32x2(-0.5, -0.5) * unitOffsetUv + iSrcInputUv,
+ Samples.fColor00, Samples.fColor10, Samples.fColor01, Samples.fColor11);
+ Samples.fColor20 = WrapHistory(FfxFloat32x2(1, -1) + iPxSample);
+ Samples.fColor21 = WrapHistory(FfxFloat32x2(1, 0) + iPxSample);
+ Samples.fColor02 = WrapHistory(FfxFloat32x2(-1, 1) + iPxSample);
+ Samples.fColor12 = WrapHistory(FfxFloat32x2(0, 1) + iPxSample);
+ Samples.fColor22 = WrapHistory(FfxFloat32x2(1, 1) + iPxSample);
+
+ return Samples;
+}
+//DeclareCustomFetch9TapSamplesMin16(FetchHistorySamples, WrapHistory)
+DeclareCustomTextureSampleMin16(HistorySample, Lanczos2Approx, FetchHistorySamples)
+#endif // FFXM_FSR2_REPROJECT_MODE
+
+#else // !FFXM_HALF
+
+#ifndef FFXM_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE
+#define FFXM_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE 0 // Reference
+#endif
+DeclareCustomFetchBicubicSamples(FetchHistorySamples, WrapHistory)
+DeclareCustomTextureSample(HistorySample, FFXM_FSR2_GET_LANCZOS_SAMPLER1D(FFXM_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples)
+#endif
+
+FfxFloat32x4 WrapLockStatus(FfxInt32x2 iPxSample)
+{
+ FfxFloat32x4 fSample = FfxFloat32x4(LoadLockStatus(iPxSample), 0.0f, 0.0f);
+ return fSample;
+}
+
+#if FFXM_HALF
+FFXM_MIN16_F4 WrapLockStatus(FFXM_MIN16_I2 iPxSample)
+{
+ FFXM_MIN16_F4 fSample = FFXM_MIN16_F4(LoadLockStatus(iPxSample), 0.0, 0.0);
+
+ return fSample;
+}
+#endif
+
+#if FFXM_HALF
+DeclareCustomFetchBilinearSamplesMin16(FetchLockStatusSamples, WrapLockStatus)
+DeclareCustomTextureSampleMin16(LockStatusSample, Bilinear, FetchLockStatusSamples)
+#else
+DeclareCustomFetchBilinearSamples(FetchLockStatusSamples, WrapLockStatus)
+DeclareCustomTextureSample(LockStatusSample, Bilinear, FetchLockStatusSamples)
+#endif
+
+FfxFloat32x2 GetMotionVector(FfxInt32x2 iPxHrPos, FfxFloat32x2 fHrUv)
+{
+#if FFXM_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+ FfxFloat32x2 fDilatedMotionVector = LoadDilatedMotionVector(FfxInt32x2(fHrUv * RenderSize()));
+#else
+ FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iPxHrPos);
+#endif
+
+ return fDilatedMotionVector;
+}
+
+FfxBoolean IsUvInside(FfxFloat32x2 fUv)
+{
+ return (fUv.x >= 0.0f && fUv.x <= 1.0f) && (fUv.y >= 0.0f && fUv.y <= 1.0f);
+}
+
+void ComputeReprojectedUVs(const AccumulationPassCommonParams params, FFXM_PARAMETER_OUT FfxFloat32x2 fReprojectedHrUv, FFXM_PARAMETER_OUT FfxBoolean bIsExistingSample)
+{
+ fReprojectedHrUv = params.fHrUv + params.fMotionVector;
+
+ bIsExistingSample = IsUvInside(fReprojectedHrUv);
+}
+
+#if !FFXM_HALF
+void ReprojectHistoryColor(const AccumulationPassCommonParams params, FFXM_PARAMETER_OUT FfxFloat32x3 fHistoryColor, FFXM_PARAMETER_OUT FfxFloat32 fTemporalReactiveFactor, FFXM_PARAMETER_OUT FfxBoolean bInMotionLastFrame)
+{
+ FfxFloat32x4 fHistory = HistorySample(params.fReprojectedHrUv, DisplaySize());
+
+ fHistoryColor = PrepareRgb(fHistory.rgb, Exposure(), PreviousFramePreExposure());
+
+#if !FFXM_SHADER_QUALITY_OPT_TONEMAPPED_RGB_PREPARED_INPUT_COLOR
+ fHistoryColor = RGBToYCoCg(fHistoryColor);
+#endif
+
+ //Compute temporal reactivity info
+#if FFXM_SHADER_QUALITY_OPT_SEPARATE_TEMPORAL_REACTIVE
+ fTemporalReactiveFactor = ffxSaturate(abs(SampleTemporalReactive(params.fReprojectedHrUv)));
+#else
+ fTemporalReactiveFactor = ffxSaturate(abs(fHistory.w));
+#endif
+ bInMotionLastFrame = (fHistory.w < 0.0f);
+}
+
+LockState ReprojectHistoryLockStatus(const AccumulationPassCommonParams params, FFXM_PARAMETER_OUT FfxFloat32x2 fReprojectedLockStatus)
+{
+ LockState state = { FFXM_FALSE, FFXM_FALSE };
+ const FfxFloat32 fNewLockIntensity = LoadNewLocks(params.iPxHrPos);
+ state.NewLock = fNewLockIntensity > (127.0f / 255.0f);
+
+ FfxFloat32 fInPlaceLockLifetime = state.NewLock ? fNewLockIntensity : 0;
+
+ fReprojectedLockStatus = SampleLockStatus(params.fReprojectedHrUv);
+
+ if (fReprojectedLockStatus[LOCK_LIFETIME_REMAINING] != FfxFloat32(0.0f)) {
+ state.WasLockedPrevFrame = true;
+ }
+
+ return state;
+}
+#else //FFXM_HALF
+
+void ReprojectHistoryColor(const AccumulationPassCommonParams params, FFXM_PARAMETER_OUT FfxFloat16x3 fHistoryColor, FFXM_PARAMETER_OUT FfxFloat16 fTemporalReactiveFactor, FFXM_PARAMETER_OUT FfxBoolean bInMotionLastFrame)
+{
+ FfxFloat16x4 fHistory = HistorySample(params.fReprojectedHrUv, DisplaySize());
+
+ fHistoryColor = FfxFloat16x3(PrepareRgb(fHistory.rgb, Exposure(), PreviousFramePreExposure()));
+
+#if !FFXM_SHADER_QUALITY_OPT_TONEMAPPED_RGB_PREPARED_INPUT_COLOR
+ fHistoryColor = RGBToYCoCg(fHistoryColor);
+#endif
+
+ //Compute temporal reactivity info
+#if FFXM_SHADER_QUALITY_OPT_SEPARATE_TEMPORAL_REACTIVE
+ fTemporalReactiveFactor = FfxFloat16(ffxSaturate(abs(SampleTemporalReactive(params.fReprojectedHrUv))));
+#else
+ fTemporalReactiveFactor = FfxFloat16(ffxSaturate(abs(fHistory.w)));
+#endif
+ bInMotionLastFrame = (fHistory.w < 0.0f);
+}
+
+LockState ReprojectHistoryLockStatus(const AccumulationPassCommonParams params, FFXM_PARAMETER_OUT FfxFloat16x2 fReprojectedLockStatus)
+{
+ LockState state = { FFXM_FALSE, FFXM_FALSE };
+ const FfxFloat16 fNewLockIntensity = FfxFloat16(LoadNewLocks(params.iPxHrPos));
+ state.NewLock = fNewLockIntensity > (127.0f / 255.0f);
+
+ FfxFloat16 fInPlaceLockLifetime = state.NewLock ? fNewLockIntensity : FfxFloat16(0);
+
+ fReprojectedLockStatus = FfxFloat16x2(SampleLockStatus(params.fReprojectedHrUv));
+
+ if (fReprojectedLockStatus[LOCK_LIFETIME_REMAINING] != FfxFloat16(0.0f)) {
+ state.WasLockedPrevFrame = true;
+ }
+ return state;
+}
+
+#endif
+
+#endif //!defined( FFXM_FSR2_REPROJECT_H )
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h.meta
new file mode 100644
index 0000000..81fe61c
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_reproject.h.meta
@@ -0,0 +1,67 @@
+fileFormatVersion: 2
+guid: b997a83902840b04fbecef298bd4b620
+PluginImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ iconMap: {}
+ executionOrder: {}
+ defineConstraints: []
+ isPreloaded: 0
+ isOverridable: 1
+ isExplicitlyReferenced: 0
+ validateReferences: 1
+ platformData:
+ - first:
+ : Any
+ second:
+ enabled: 0
+ settings:
+ Exclude Android: 1
+ Exclude Editor: 1
+ Exclude GameCoreScarlett: 1
+ Exclude GameCoreXboxOne: 1
+ Exclude Linux64: 1
+ Exclude OSXUniversal: 1
+ Exclude PS4: 1
+ Exclude PS5: 1
+ Exclude WebGL: 1
+ Exclude Win: 1
+ Exclude Win64: 1
+ - first:
+ Any:
+ second:
+ enabled: 0
+ settings: {}
+ - first:
+ Editor: Editor
+ second:
+ enabled: 0
+ settings:
+ DefaultValueInitialized: true
+ - first:
+ Standalone: Linux64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: OSXUniversal
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_resources.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_resources.h
new file mode 100644
index 0000000..fb1fae9
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_resources.h
@@ -0,0 +1,100 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#ifndef FFXM_FSR2_RESOURCES_H
+#define FFXM_FSR2_RESOURCES_H
+
+#if defined(FFXM_CPU) || defined(FFXM_GPU)
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_NULL 0
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY 1
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR 2
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS 3
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH 4
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE 5
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK 6
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK 7
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH 8
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS 9
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH 10
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR 11
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS 12
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS 13
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR 14
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY 15
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT 16
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT 17
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT 18
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT 19
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT 20
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1 21
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2 22
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1 23
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 24
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY 25
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_TRANSPARENCY_AND_COMPOSITION 26
+#define FFXM_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT 27
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS 28
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE 29 // same as FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0 29
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_1 30
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_2 31
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_3 32
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4 33
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5 34
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_6 35
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_7 36
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_8 37
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_9 38
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_10 39
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_11 40
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12 41
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE 42
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE 43
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE 44
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS 45
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1 46
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2 47
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1 48
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2 49
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA 50
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_INTERNAL_TEMPORAL_REACTIVE 51
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_INTERNAL_TEMPORAL_REACTIVE_1 52
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_INTERNAL_TEMPORAL_REACTIVE_2 53
+
+// Shading change detection mip level setting, value must be in the range [FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0, FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12]
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4
+#define FFXM_FSR2_SHADING_CHANGE_MIP_LEVEL (FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE - FFXM_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE)
+
+#define FFXM_FSR2_RESOURCE_IDENTIFIER_COUNT 54
+
+#define FFXM_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2 0
+#define FFXM_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD 1
+#define FFXM_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS 2
+#define FFXM_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE 3
+
+#define FFXM_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP 1
+#define FFXM_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP 2
+#define FFXM_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD 4
+#define FFXM_FSR2_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX 8
+
+#endif // #if defined(FFXM_CPU) || defined(FFXM_GPU)
+
+#endif //!defined( FFXM_FSR2_RESOURCES_H )
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_resources.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_resources.h.meta
new file mode 100644
index 0000000..8141bdd
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_resources.h.meta
@@ -0,0 +1,67 @@
+fileFormatVersion: 2
+guid: 80c7963a01a0e5c4bb69e6b897267a9b
+PluginImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ iconMap: {}
+ executionOrder: {}
+ defineConstraints: []
+ isPreloaded: 0
+ isOverridable: 1
+ isExplicitlyReferenced: 0
+ validateReferences: 1
+ platformData:
+ - first:
+ : Any
+ second:
+ enabled: 0
+ settings:
+ Exclude Android: 1
+ Exclude Editor: 1
+ Exclude GameCoreScarlett: 1
+ Exclude GameCoreXboxOne: 1
+ Exclude Linux64: 1
+ Exclude OSXUniversal: 1
+ Exclude PS4: 1
+ Exclude PS5: 1
+ Exclude WebGL: 1
+ Exclude Win: 1
+ Exclude Win64: 1
+ - first:
+ Any:
+ second:
+ enabled: 0
+ settings: {}
+ - first:
+ Editor: Editor
+ second:
+ enabled: 0
+ settings:
+ DefaultValueInitialized: true
+ - first:
+ Standalone: Linux64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: OSXUniversal
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_sample.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_sample.h
new file mode 100644
index 0000000..8c5408f
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_sample.h
@@ -0,0 +1,699 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#ifndef FFXM_FSR2_SAMPLE_H
+#define FFXM_FSR2_SAMPLE_H
+
+// suppress warnings
+#ifdef FFXM_HLSL
+#pragma warning(disable: 4008) // potentially divide by zero
+#endif //FFXM_HLSL
+
+struct FetchedBilinearSamples {
+
+ FfxFloat32x4 fColor00;
+ FfxFloat32x4 fColor10;
+
+ FfxFloat32x4 fColor01;
+ FfxFloat32x4 fColor11;
+};
+
+struct FetchedBicubicSamples {
+
+ FfxFloat32x4 fColor00;
+ FfxFloat32x4 fColor10;
+ FfxFloat32x4 fColor20;
+ FfxFloat32x4 fColor30;
+
+ FfxFloat32x4 fColor01;
+ FfxFloat32x4 fColor11;
+ FfxFloat32x4 fColor21;
+ FfxFloat32x4 fColor31;
+
+ FfxFloat32x4 fColor02;
+ FfxFloat32x4 fColor12;
+ FfxFloat32x4 fColor22;
+ FfxFloat32x4 fColor32;
+
+ FfxFloat32x4 fColor03;
+ FfxFloat32x4 fColor13;
+ FfxFloat32x4 fColor23;
+ FfxFloat32x4 fColor33;
+};
+
+#if FFXM_HALF
+struct FetchedBilinearSamplesMin16 {
+
+ FFXM_MIN16_F4 fColor00;
+ FFXM_MIN16_F4 fColor10;
+
+ FFXM_MIN16_F4 fColor01;
+ FFXM_MIN16_F4 fColor11;
+};
+
+struct FetchedBicubicSamplesMin16 {
+
+ FFXM_MIN16_F4 fColor00;
+ FFXM_MIN16_F4 fColor10;
+ FFXM_MIN16_F4 fColor20;
+ FFXM_MIN16_F4 fColor30;
+
+ FFXM_MIN16_F4 fColor01;
+ FFXM_MIN16_F4 fColor11;
+ FFXM_MIN16_F4 fColor21;
+ FFXM_MIN16_F4 fColor31;
+
+ FFXM_MIN16_F4 fColor02;
+ FFXM_MIN16_F4 fColor12;
+ FFXM_MIN16_F4 fColor22;
+ FFXM_MIN16_F4 fColor32;
+
+ FFXM_MIN16_F4 fColor03;
+ FFXM_MIN16_F4 fColor13;
+ FFXM_MIN16_F4 fColor23;
+ FFXM_MIN16_F4 fColor33;
+};
+
+struct Fetched9TapSamplesMin16 {
+
+ FFXM_MIN16_F4 fColor00;
+ FFXM_MIN16_F4 fColor10;
+ FFXM_MIN16_F4 fColor20;
+
+ FFXM_MIN16_F4 fColor01;
+ FFXM_MIN16_F4 fColor11;
+ FFXM_MIN16_F4 fColor21;
+
+ FFXM_MIN16_F4 fColor02;
+ FFXM_MIN16_F4 fColor12;
+ FFXM_MIN16_F4 fColor22;
+};
+
+#else //FFXM_HALF
+#define FetchedBicubicSamplesMin16 FetchedBicubicSamples
+#define FetchedBilinearSamplesMin16 FetchedBilinearSamples
+#endif //FFXM_HALF
+
+FfxFloat32x4 Linear(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32 t)
+{
+ return A + (B - A) * t;
+}
+
+FfxFloat32x4 Bilinear(FetchedBilinearSamples BilinearSamples, FfxFloat32x2 fPxFrac)
+{
+ FfxFloat32x4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);
+ FfxFloat32x4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);
+ FfxFloat32x4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);
+ return fColorXY;
+}
+
+#if FFXM_HALF
+FFXM_MIN16_F4 Linear(FFXM_MIN16_F4 A, FFXM_MIN16_F4 B, FFXM_MIN16_F t)
+{
+ return A + (B - A) * t;
+}
+
+FFXM_MIN16_F4 Bilinear(FetchedBilinearSamplesMin16 BilinearSamples, FFXM_MIN16_F2 fPxFrac)
+{
+ FFXM_MIN16_F4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);
+ FFXM_MIN16_F4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);
+ FFXM_MIN16_F4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);
+ return fColorXY;
+}
+#endif
+
+FfxFloat32 Lanczos2NoClamp(FfxFloat32 x)
+{
+ const FfxFloat32 PI = 3.141592653589793f; // TODO: share SDK constants
+ return abs(x) < FSR2_EPSILON ? 1.f : (sin(PI * x) / (PI * x)) * (sin(0.5f * PI * x) / (0.5f * PI * x));
+}
+
+FfxFloat32 Lanczos2(FfxFloat32 x)
+{
+ x = ffxMin(abs(x), 2.0f);
+ return Lanczos2NoClamp(x);
+}
+
+#if FFXM_HALF
+
+#if 1
+FFXM_MIN16_F Lanczos2NoClamp(FFXM_MIN16_F x)
+{
+ const FFXM_MIN16_F PI = FFXM_MIN16_F(3.141592653589793f); // TODO: share SDK constants
+ return abs(x) < FFXM_MIN16_F(FSR2_EPSILON) ? FFXM_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFXM_MIN16_F(0.5f) * PI * x) / (FFXM_MIN16_F(0.5f) * PI * x));
+}
+#endif
+
+FFXM_MIN16_F Lanczos2(FFXM_MIN16_F x)
+{
+ x = ffxMin(abs(x), FFXM_MIN16_F(2.0f));
+ return FFXM_MIN16_F(Lanczos2NoClamp(x));
+}
+#endif //FFXM_HALF
+
+// FSR1 lanczos approximation. Input is x*x and must be <= 4.
+FfxFloat32 Lanczos2ApproxSqNoClamp(FfxFloat32 x2)
+{
+ FfxFloat32 a = (2.0f / 5.0f) * x2 - 1;
+ FfxFloat32 b = (1.0f / 4.0f) * x2 - 1;
+ return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b);
+}
+
+#if FFXM_HALF
+FFXM_MIN16_F Lanczos2ApproxSqNoClamp(FFXM_MIN16_F x2)
+{
+ FFXM_MIN16_F a = FFXM_MIN16_F(2.0f / 5.0f) * x2 - FFXM_MIN16_F(1);
+ FFXM_MIN16_F b = FFXM_MIN16_F(1.0f / 4.0f) * x2 - FFXM_MIN16_F(1);
+ return (FFXM_MIN16_F(25.0f / 16.0f) * a * a - FFXM_MIN16_F(25.0f / 16.0f - 1)) * (b * b);
+}
+#endif //FFXM_HALF
+
+FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2)
+{
+ x2 = ffxMin(x2, 4.0f);
+ return Lanczos2ApproxSqNoClamp(x2);
+}
+
+#if FFXM_HALF
+FFXM_MIN16_F Lanczos2ApproxSq(FFXM_MIN16_F x2)
+{
+ x2 = ffxMin(x2, FFXM_MIN16_F(4.0f));
+ return Lanczos2ApproxSqNoClamp(x2);
+}
+#endif //FFXM_HALF
+
+FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x)
+{
+ return Lanczos2ApproxSqNoClamp(x * x);
+}
+
+#if FFXM_HALF
+FFXM_MIN16_F Lanczos2ApproxNoClamp(FFXM_MIN16_F x)
+{
+ return Lanczos2ApproxSqNoClamp(x * x);
+}
+#endif //FFXM_HALF
+
+FfxFloat32 Lanczos2Approx(FfxFloat32 x)
+{
+ return Lanczos2ApproxSq(x * x);
+}
+
+#if FFXM_HALF
+FFXM_MIN16_F Lanczos2Approx(FFXM_MIN16_F x)
+{
+ return Lanczos2ApproxSq(x * x);
+}
+#endif //FFXM_HALF
+
+FfxFloat32 Lanczos2_UseLUT(FfxFloat32 x)
+{
+ return SampleLanczos2Weight(abs(x));
+}
+
+#if FFXM_HALF
+FFXM_MIN16_F Lanczos2_UseLUT(FFXM_MIN16_F x)
+{
+ return FFXM_MIN16_F(SampleLanczos2Weight(abs(x)));
+}
+#endif //FFXM_HALF
+
+FfxFloat32x4 Lanczos2_UseLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
+{
+ FfxFloat32 fWeight0 = Lanczos2_UseLUT(-1.f - t);
+ FfxFloat32 fWeight1 = Lanczos2_UseLUT(-0.f - t);
+ FfxFloat32 fWeight2 = Lanczos2_UseLUT(+1.f - t);
+ FfxFloat32 fWeight3 = Lanczos2_UseLUT(+2.f - t);
+ return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+#if FFXM_HALF
+FFXM_MIN16_F4 Lanczos2_UseLUT(FFXM_MIN16_F4 fColor0, FFXM_MIN16_F4 fColor1, FFXM_MIN16_F4 fColor2, FFXM_MIN16_F4 fColor3, FFXM_MIN16_F t)
+{
+ FFXM_MIN16_F fWeight0 = Lanczos2_UseLUT(FFXM_MIN16_F(-1.f) - t);
+ FFXM_MIN16_F fWeight1 = Lanczos2_UseLUT(FFXM_MIN16_F(-0.f) - t);
+ FFXM_MIN16_F fWeight2 = Lanczos2_UseLUT(FFXM_MIN16_F(+1.f) - t);
+ FFXM_MIN16_F fWeight3 = Lanczos2_UseLUT(FFXM_MIN16_F(+2.f) - t);
+ return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+#endif
+
+FfxFloat32x4 Lanczos2(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
+{
+ FfxFloat32 fWeight0 = Lanczos2(-1.f - t);
+ FfxFloat32 fWeight1 = Lanczos2(-0.f - t);
+ FfxFloat32 fWeight2 = Lanczos2(+1.f - t);
+ FfxFloat32 fWeight3 = Lanczos2(+2.f - t);
+ return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+
+FfxFloat32x4 Lanczos2(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
+{
+ FfxFloat32x4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+ FfxFloat32x4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+ FfxFloat32x4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+ FfxFloat32x4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+ FfxFloat32x4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+#if !FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING
+ // Deringing
+
+ // TODO: only use 4 by checking jitter
+ const FfxInt32 iDeringingSampleCount = 4;
+ const FfxFloat32x4 fDeringingSamples[4] = {
+ Samples.fColor11,
+ Samples.fColor21,
+ Samples.fColor12,
+ Samples.fColor22,
+ };
+
+ FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
+ FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
+
+ FFXM_UNROLL
+ for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {
+
+ fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+ fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+ }
+
+ fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+#endif
+ return fColorXY;
+}
+
+#if FFXM_HALF
+FFXM_MIN16_F4 Lanczos2(FFXM_MIN16_F4 fColor0, FFXM_MIN16_F4 fColor1, FFXM_MIN16_F4 fColor2, FFXM_MIN16_F4 fColor3, FFXM_MIN16_F t)
+{
+ FFXM_MIN16_F fWeight0 = Lanczos2(FFXM_MIN16_F(-1.f) - t);
+ FFXM_MIN16_F fWeight1 = Lanczos2(FFXM_MIN16_F(-0.f) - t);
+ FFXM_MIN16_F fWeight2 = Lanczos2(FFXM_MIN16_F(+1.f) - t);
+ FFXM_MIN16_F fWeight3 = Lanczos2(FFXM_MIN16_F(+2.f) - t);
+ return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+
+FFXM_MIN16_F4 Lanczos2(FetchedBicubicSamplesMin16 Samples, FFXM_MIN16_F2 fPxFrac)
+{
+ FFXM_MIN16_F4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+ FFXM_MIN16_F4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+ FFXM_MIN16_F4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+ FFXM_MIN16_F4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+ FFXM_MIN16_F4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+#if !FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING
+ // Deringing
+
+ // TODO: only use 4 by checking jitter
+ const FfxInt32 iDeringingSampleCount = 4;
+ const FFXM_MIN16_F4 fDeringingSamples[4] = {
+ Samples.fColor11,
+ Samples.fColor21,
+ Samples.fColor12,
+ Samples.fColor22,
+ };
+
+ FFXM_MIN16_F4 fDeringingMin = fDeringingSamples[0];
+ FFXM_MIN16_F4 fDeringingMax = fDeringingSamples[0];
+
+ FFXM_UNROLL
+ for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
+ {
+ fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+ fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+ }
+
+ fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+#endif
+ return fColorXY;
+}
+
+FFXM_MIN16_F4 Lanczos2(FFXM_MIN16_F4 fColor0, FFXM_MIN16_F4 fColor1, FFXM_MIN16_F4 fColor2, FFXM_MIN16_F t)
+{
+ FFXM_MIN16_F fWeight0 = Lanczos2(FFXM_MIN16_F(-1.f) - t);
+ FFXM_MIN16_F fWeight1 = Lanczos2(FFXM_MIN16_F(-0.f) - t);
+ FFXM_MIN16_F fWeight2 = Lanczos2(FFXM_MIN16_F(+1.f) - t);
+ return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2) / (fWeight0 + fWeight1 + fWeight2);
+}
+
+FFXM_MIN16_F4 Lanczos2Approx(FFXM_MIN16_F4 fColor0, FFXM_MIN16_F4 fColor1, FFXM_MIN16_F4 fColor2, FFXM_MIN16_F t)
+{
+ FFXM_MIN16_F fWeight0 = Lanczos2ApproxNoClamp(FFXM_MIN16_F(-1.f) - t);
+ FFXM_MIN16_F fWeight1 = Lanczos2ApproxNoClamp(FFXM_MIN16_F(-0.f) - t);
+ FFXM_MIN16_F fWeight2 = Lanczos2ApproxNoClamp(FFXM_MIN16_F(+1.f) - t);
+ return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2) / (fWeight0 + fWeight1 + fWeight2);
+}
+
+FFXM_MIN16_F4 Lanczos2Approx(Fetched9TapSamplesMin16 Samples, FFXM_MIN16_F2 fPxFrac)
+{
+ FFXM_MIN16_F4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, fPxFrac.x);
+ FFXM_MIN16_F4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, fPxFrac.x);
+ FFXM_MIN16_F4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, fPxFrac.x);
+ FFXM_MIN16_F4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fPxFrac.y);
+
+#if !FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING
+ // Deringing
+ const FfxInt32 iDeringingSampleCount = 4;
+ const FFXM_MIN16_F4 fDeringingSamples[4] = {
+ Samples.fColor11,
+ Samples.fColor21,
+ Samples.fColor12,
+ Samples.fColor22,
+ };
+
+ FFXM_MIN16_F4 fDeringingMin = fDeringingSamples[0];
+ FFXM_MIN16_F4 fDeringingMax = fDeringingSamples[0];
+
+ FFXM_UNROLL
+ for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
+ {
+ fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+ fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+ }
+
+ fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+#endif
+ return fColorXY;
+}
+
+#endif //FFXM_HALF
+
+
+FfxFloat32x4 Lanczos2LUT(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
+{
+ FfxFloat32x4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+ FfxFloat32x4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+ FfxFloat32x4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+ FfxFloat32x4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+ FfxFloat32x4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+#if !FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING
+ // Deringing
+
+ // TODO: only use 4 by checking jitter
+ const FfxInt32 iDeringingSampleCount = 4;
+ const FfxFloat32x4 fDeringingSamples[4] = {
+ Samples.fColor11,
+ Samples.fColor21,
+ Samples.fColor12,
+ Samples.fColor22,
+ };
+
+ FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
+ FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
+
+ FFXM_UNROLL
+ for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {
+
+ fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+ fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+ }
+
+ fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+#endif
+ return fColorXY;
+}
+
+#if FFXM_HALF
+FFXM_MIN16_F4 Lanczos2LUT(FetchedBicubicSamplesMin16 Samples, FFXM_MIN16_F2 fPxFrac)
+{
+ FFXM_MIN16_F4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+ FFXM_MIN16_F4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+ FFXM_MIN16_F4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+ FFXM_MIN16_F4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+ FFXM_MIN16_F4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+#if !FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING
+ // Deringing
+
+ // TODO: only use 4 by checking jitter
+ const FfxInt32 iDeringingSampleCount = 4;
+ const FFXM_MIN16_F4 fDeringingSamples[4] = {
+ Samples.fColor11,
+ Samples.fColor21,
+ Samples.fColor12,
+ Samples.fColor22,
+ };
+
+ FFXM_MIN16_F4 fDeringingMin = fDeringingSamples[0];
+ FFXM_MIN16_F4 fDeringingMax = fDeringingSamples[0];
+
+ FFXM_UNROLL
+ for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
+ {
+ fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+ fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+ }
+
+ fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+#endif
+ return fColorXY;
+}
+#endif //FFXM_HALF
+
+
+
+FfxFloat32x4 Lanczos2Approx(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
+{
+ FfxFloat32 fWeight0 = Lanczos2ApproxNoClamp(-1.f - t);
+ FfxFloat32 fWeight1 = Lanczos2ApproxNoClamp(-0.f - t);
+ FfxFloat32 fWeight2 = Lanczos2ApproxNoClamp(+1.f - t);
+ FfxFloat32 fWeight3 = Lanczos2ApproxNoClamp(+2.f - t);
+ return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+
+#if FFXM_HALF
+FFXM_MIN16_F4 Lanczos2Approx(FFXM_MIN16_F4 fColor0, FFXM_MIN16_F4 fColor1, FFXM_MIN16_F4 fColor2, FFXM_MIN16_F4 fColor3, FFXM_MIN16_F t)
+{
+ FFXM_MIN16_F fWeight0 = Lanczos2ApproxNoClamp(FFXM_MIN16_F(-1.f) - t);
+ FFXM_MIN16_F fWeight1 = Lanczos2ApproxNoClamp(FFXM_MIN16_F(-0.f) - t);
+ FFXM_MIN16_F fWeight2 = Lanczos2ApproxNoClamp(FFXM_MIN16_F(+1.f) - t);
+ FFXM_MIN16_F fWeight3 = Lanczos2ApproxNoClamp(FFXM_MIN16_F(+2.f) - t);
+ return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+#endif //FFXM_HALF
+
+FfxFloat32x4 Lanczos2Approx(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
+{
+ FfxFloat32x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+ FfxFloat32x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+ FfxFloat32x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+ FfxFloat32x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+ FfxFloat32x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+#if !FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING
+ // Deringing
+
+ // TODO: only use 4 by checking jitter
+ const FfxInt32 iDeringingSampleCount = 4;
+ const FfxFloat32x4 fDeringingSamples[4] = {
+ Samples.fColor11,
+ Samples.fColor21,
+ Samples.fColor12,
+ Samples.fColor22,
+ };
+
+ FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
+ FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
+
+ FFXM_UNROLL
+ for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
+ {
+ fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+ fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+ }
+
+ fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+#endif
+ return fColorXY;
+}
+
+#if FFXM_HALF
+FFXM_MIN16_F4 Lanczos2Approx(FetchedBicubicSamplesMin16 Samples, FFXM_MIN16_F2 fPxFrac)
+{
+ FFXM_MIN16_F4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+ FFXM_MIN16_F4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+ FFXM_MIN16_F4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+ FFXM_MIN16_F4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+ FFXM_MIN16_F4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+#if !FFXM_SHADER_QUALITY_OPT_DISABLE_DERINGING
+ // Deringing
+
+ // TODO: only use 4 by checking jitter
+ const FfxInt32 iDeringingSampleCount = 4;
+ const FFXM_MIN16_F4 fDeringingSamples[4] = {
+ Samples.fColor11,
+ Samples.fColor21,
+ Samples.fColor12,
+ Samples.fColor22,
+ };
+
+ FFXM_MIN16_F4 fDeringingMin = fDeringingSamples[0];
+ FFXM_MIN16_F4 fDeringingMax = fDeringingSamples[0];
+
+ FFXM_UNROLL
+ for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
+ {
+ fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+ fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+ }
+
+ fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+#endif
+ return fColorXY;
+}
+#endif
+
+// Clamp by offset direction. Assuming iPxSample is already in range and iPxOffset is compile time constant.
+FfxInt32x2 ClampCoord(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize)
+{
+ FfxInt32x2 result = iPxSample + iPxOffset;
+ result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x;
+ result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x;
+ result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y;
+ result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y;
+ return result;
+}
+#if FFXM_HALF
+FFXM_MIN16_I2 ClampCoord(FFXM_MIN16_I2 iPxSample, FFXM_MIN16_I2 iPxOffset, FFXM_MIN16_I2 iTextureSize)
+{
+ FFXM_MIN16_I2 result = iPxSample + iPxOffset;
+ result.x = (iPxOffset.x < FFXM_MIN16_I(0)) ? ffxMax(result.x, FFXM_MIN16_I(0)) : result.x;
+ result.x = (iPxOffset.x > FFXM_MIN16_I(0)) ? ffxMin(result.x, iTextureSize.x - FFXM_MIN16_I(1)) : result.x;
+ result.y = (iPxOffset.y < FFXM_MIN16_I(0)) ? ffxMax(result.y, FFXM_MIN16_I(0)) : result.y;
+ result.y = (iPxOffset.y > FFXM_MIN16_I(0)) ? ffxMin(result.y, iTextureSize.y - FFXM_MIN16_I(1)) : result.y;
+ return result;
+}
+#endif //FFXM_HALF
+
+
+#define DeclareCustomFetchBicubicSamplesWithType(SampleType, TextureType, AddrType, Name, LoadTexture) \
+ SampleType Name(AddrType iPxSample, AddrType iTextureSize) \
+ { \
+ SampleType Samples; \
+ \
+ Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, -1), iTextureSize))); \
+ Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, -1), iTextureSize))); \
+ Samples.fColor20 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, -1), iTextureSize))); \
+ Samples.fColor30 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, -1), iTextureSize))); \
+ \
+ Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +0), iTextureSize))); \
+ Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \
+ Samples.fColor21 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \
+ Samples.fColor31 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +0), iTextureSize))); \
+ \
+ Samples.fColor02 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +1), iTextureSize))); \
+ Samples.fColor12 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \
+ Samples.fColor22 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \
+ Samples.fColor32 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +1), iTextureSize))); \
+ \
+ Samples.fColor03 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +2), iTextureSize))); \
+ Samples.fColor13 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +2), iTextureSize))); \
+ Samples.fColor23 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +2), iTextureSize))); \
+ Samples.fColor33 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +2), iTextureSize))); \
+ \
+ return Samples; \
+ }
+
+#define DeclareCustomFetch9TapSamplesWithType(SampleType, TextureType, AddrType, Name, LoadTexture) \
+ SampleType Name(AddrType iPxSample, AddrType iTextureSize) \
+ { \
+ SampleType Samples; \
+ \
+ Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, -1), iTextureSize))); \
+ Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, -1), iTextureSize))); \
+ Samples.fColor20 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, -1), iTextureSize))); \
+ \
+ Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +0), iTextureSize))); \
+ Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \
+ Samples.fColor21 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \
+ \
+ Samples.fColor02 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +1), iTextureSize))); \
+ Samples.fColor12 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \
+ Samples.fColor22 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \
+ \
+ return Samples; \
+ }
+
+#define DeclareCustomFetchBicubicSamples(Name, LoadTexture) \
+ DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture)
+
+#define DeclareCustomFetchBicubicSamplesMin16(Name, LoadTexture) \
+ DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamplesMin16, FFXM_MIN16_F4, FfxInt32x2, Name, LoadTexture)
+
+#define DeclareCustomFetch9TapSamplesMin16(Name, LoadTexture) \
+ DeclareCustomFetch9TapSamplesWithType(Fetched9TapSamplesMin16, FFXM_MIN16_F4, FfxInt32x2, Name, LoadTexture)
+
+#define DeclareCustomFetchBilinearSamplesWithType(SampleType, TextureType,AddrType, Name, LoadTexture) \
+ SampleType Name(AddrType iPxSample, AddrType iTextureSize) \
+ { \
+ SampleType Samples; \
+ Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \
+ Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \
+ Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \
+ Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \
+ return Samples; \
+ }
+
+#define DeclareCustomFetchBilinearSamples(Name, LoadTexture) \
+ DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture)
+
+#define DeclareCustomFetchBilinearSamplesMin16(Name, LoadTexture) \
+ DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamplesMin16, FFXM_MIN16_F4, FfxInt32x2, Name, LoadTexture)
+
+// BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102)
+// is common, so iPxSample can "jitter"
+#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \
+ FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \
+ { \
+ FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \
+ /* Clamp base coords */ \
+ fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \
+ fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \
+ /* */ \
+ FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \
+ FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \
+ FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \
+ return fColorXY; \
+ }
+
+#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \
+ FFXM_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \
+ { \
+ FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \
+ /* Clamp base coords */ \
+ fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \
+ fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \
+ /* */ \
+ FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \
+ FFXM_MIN16_F2 fPxFrac = FFXM_MIN16_F2(ffxFract(fPxSample)); \
+ FFXM_MIN16_F4 fColorXY = FFXM_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \
+ return fColorXY; \
+ }
+
+#define FFXM_FSR2_CONCAT_ID(x, y) x ## y
+#define FFXM_FSR2_CONCAT(x, y) FFXM_FSR2_CONCAT_ID(x, y)
+#define FFXM_FSR2_SAMPLER_1D_0 Lanczos2
+#define FFXM_FSR2_SAMPLER_1D_1 Lanczos2LUT
+#define FFXM_FSR2_SAMPLER_1D_2 Lanczos2Approx
+
+#define FFXM_FSR2_GET_LANCZOS_SAMPLER1D(x) FFXM_FSR2_CONCAT(FFXM_FSR2_SAMPLER_1D_, x)
+
+#endif //!defined( FFXM_FSR2_SAMPLE_H )
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_sample.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_sample.h.meta
new file mode 100644
index 0000000..5b81ce7
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_sample.h.meta
@@ -0,0 +1,67 @@
+fileFormatVersion: 2
+guid: 161ce220c1b38aa41992c3c6e1099300
+PluginImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ iconMap: {}
+ executionOrder: {}
+ defineConstraints: []
+ isPreloaded: 0
+ isOverridable: 1
+ isExplicitlyReferenced: 0
+ validateReferences: 1
+ platformData:
+ - first:
+ : Any
+ second:
+ enabled: 0
+ settings:
+ Exclude Android: 1
+ Exclude Editor: 1
+ Exclude GameCoreScarlett: 1
+ Exclude GameCoreXboxOne: 1
+ Exclude Linux64: 1
+ Exclude OSXUniversal: 1
+ Exclude PS4: 1
+ Exclude PS5: 1
+ Exclude WebGL: 1
+ Exclude Win: 1
+ Exclude Win64: 1
+ - first:
+ Any:
+ second:
+ enabled: 0
+ settings: {}
+ - first:
+ Editor: Editor
+ second:
+ enabled: 0
+ settings:
+ DefaultValueInitialized: true
+ - first:
+ Standalone: Linux64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: OSXUniversal
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_upsample.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_upsample.h
new file mode 100644
index 0000000..d1829bf
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_upsample.h
@@ -0,0 +1,195 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#ifndef FFXM_FSR2_UPSAMPLE_H
+#define FFXM_FSR2_UPSAMPLE_H
+
+#define FFXM_FSR2_UPSAMPLE_USE_LANCZOS_9_TAP 0
+#define FFXM_FSR2_UPSAMPLE_USE_LANCZOS_5_TAP 1
+
+#if FFXM_SHADER_QUALITY_OPT_UPSCALING_LANCZOS_5TAP
+#define FFXM_FSR2_UPSAMPLE_KERNEL FFXM_FSR2_UPSAMPLE_USE_LANCZOS_5_TAP
+FFXM_STATIC const FfxInt32 iLanczos2SampleCount = 5;
+#else
+#define FFXM_FSR2_UPSAMPLE_KERNEL FFXM_FSR2_UPSAMPLE_USE_LANCZOS_9_TAP
+FFXM_STATIC const FfxUInt32 iLanczos2SampleCount = 16;
+#endif
+
+
+void Deringing(RectificationBox clippingBox, FFXM_PARAMETER_INOUT FfxFloat32x3 fColor)
+{
+ fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax);
+}
+#if FFXM_HALF
+void Deringing(RectificationBoxMin16 clippingBox, FFXM_PARAMETER_INOUT FFXM_MIN16_F3 fColor)
+{
+ fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax);
+}
+#endif
+
+FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32 fKernelWeight)
+{
+ FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx;
+ FfxFloat32 fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
+ return fSampleWeight;
+}
+
+#if FFXM_HALF
+FFXM_MIN16_F GetUpsampleLanczosWeight(FFXM_MIN16_F2 fSrcSampleOffset, FFXM_MIN16_F fKernelWeight)
+{
+ FFXM_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx;
+ FFXM_MIN16_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
+ return fSampleWeight;
+}
+#endif
+
+FfxFloat32 ComputeMaxKernelWeight() {
+ const FfxFloat32 fKernelSizeBias = 1.0f;
+
+ FfxFloat32 fKernelWeight = FfxFloat32(1) + (FfxFloat32(1.0f) / FfxFloat32x2(DownscaleFactor()) - FfxFloat32(1)).x * FfxFloat32(fKernelSizeBias);
+
+ return ffxMin(FfxFloat32(1.99f), fKernelWeight);
+}
+
+#if FFXM_HALF
+FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params,
+ FFXM_PARAMETER_INOUT RectificationBoxMin16 clippingBox, FfxFloat32 fReactiveFactor)
+#else
+FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params,
+ FFXM_PARAMETER_INOUT RectificationBox clippingBox, FfxFloat32 fReactiveFactor)
+#endif
+{
+ // We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly)
+ FfxFloat32x2 fDstOutputPos = FfxFloat32x2(params.iPxHrPos) + FFXM_BROADCAST_FLOAT32X2(0.5f); // Destination resolution output pixel center position
+ FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor(); // Source resolution output pixel center position
+ FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos)); // TODO: what about weird upscale factors...
+
+ FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0
+
+ FfxFloat32x2 iSrcInputUv = FfxFloat32x2(fSrcOutputPos) / FfxFloat32x2(MaxRenderSize());
+ FfxFloat32x2 unitOffsetUv = FfxFloat32x2(1.0f, 1.0f) / FfxFloat32x2(MaxRenderSize());
+
+ FFXM_MIN16_F4 fColorAndWeight = FFXM_MIN16_F4(0.0f, 0.0f, 0.0f, 0.0f);
+
+ FFXM_MIN16_F2 fBaseSampleOffset = FFXM_MIN16_F2(fSrcUnjitteredPos - fSrcOutputPos);
+
+ // Identify how much of each upsampled color to be used for this frame
+ const FFXM_MIN16_F fKernelReactiveFactor = FFXM_MIN16_F(ffxMax(fReactiveFactor, FfxFloat32(params.bIsNewSample)));
+ const FFXM_MIN16_F fKernelBiasMax = FFXM_MIN16_F(ComputeMaxKernelWeight() * (1.0f - fKernelReactiveFactor));
+
+ const FFXM_MIN16_F fKernelBiasMin = FFXM_MIN16_F(ffxMax(1.0f, ((1.0f + fKernelBiasMax) * 0.3f)));
+ const FFXM_MIN16_F fKernelBiasFactor = FFXM_MIN16_F(ffxMax(0.0f, ffxMax(0.25f * params.fDepthClipFactor, fKernelReactiveFactor)));
+ const FFXM_MIN16_F fKernelBias = ffxLerp(fKernelBiasMax, fKernelBiasMin, fKernelBiasFactor);
+
+ const FFXM_MIN16_F fRectificationCurveBias = FFXM_MIN16_F(ffxLerp(-2.0f, -3.0f, ffxSaturate(params.fHrVelocity / 50.0f)));
+
+ FFXM_MIN16_F2 offsetTL;
+ offsetTL.x = FFXM_MIN16_F(-1);
+ offsetTL.y = FFXM_MIN16_F(-1);
+
+ FFXM_MIN16_F2 fOffsetTL = offsetTL;
+
+#if FFXM_FSR2_UPSAMPLE_KERNEL == FFXM_FSR2_UPSAMPLE_USE_LANCZOS_9_TAP
+ FFXM_MIN16_F3 fSamples[iLanczos2SampleCount];
+ // Collect samples
+ GatherPreparedInputColorRGBQuad(FfxFloat32x2(-0.5, -0.5) * unitOffsetUv + iSrcInputUv,
+ fSamples[0], fSamples[1], fSamples[4], fSamples[5]);
+ fSamples[2] = LoadPreparedInputColor(FfxInt32x2(1, -1) + iSrcInputPos);
+ fSamples[6] = LoadPreparedInputColor(FfxInt32x2(1, 0) + iSrcInputPos);
+ fSamples[8] = LoadPreparedInputColor(FfxInt32x2(-1, 1) + iSrcInputPos);
+ fSamples[9] = LoadPreparedInputColor(FfxInt32x2(0, 1) + iSrcInputPos);
+ fSamples[10] = LoadPreparedInputColor(FfxInt32x2(1, 1) + iSrcInputPos);
+
+ FFXM_UNROLL
+ for (FfxInt32 row = 0; row < 3; row++)
+ {
+ FFXM_UNROLL
+ for (FfxInt32 col = 0; col < 3; col++)
+ {
+ FfxInt32 iSampleIndex = col + (row << 2);
+ const FfxInt32x2 sampleColRow = FfxInt32x2(col, row);
+ const FFXM_MIN16_F2 fOffset = fOffsetTL + FFXM_MIN16_F2(sampleColRow);
+ FFXM_MIN16_F2 fSrcSampleOffset = fBaseSampleOffset + fOffset;
+
+ FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow;
+ FFXM_MIN16_F fSampleWeight = FFXM_MIN16_F(GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelBias));
+
+ fColorAndWeight += FFXM_MIN16_F4(fSamples[iSampleIndex] * fSampleWeight, fSampleWeight);
+
+ // Update rectification box
+ {
+ const FFXM_MIN16_F fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset);
+ const FFXM_MIN16_F fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq);
+
+ const FfxBoolean bInitialSample = (row == 0) && (col == 0);
+ RectificationBoxAddSample(bInitialSample, clippingBox, fSamples[iSampleIndex], fBoxSampleWeight);
+ }
+ }
+ }
+#elif FFXM_FSR2_UPSAMPLE_KERNEL == FFXM_FSR2_UPSAMPLE_USE_LANCZOS_5_TAP
+
+ FFXM_MIN16_F3 fSamples[iLanczos2SampleCount];
+ // Collect samples
+ FfxInt32x2 rowCol [iLanczos2SampleCount] = {FfxInt32x2(0, -1), FfxInt32x2(-1, 0), FfxInt32x2(0, 0), FfxInt32x2(1, 0), FfxInt32x2(0, 1)};
+ fSamples[0] = LoadPreparedInputColor(rowCol[0] + iSrcInputPos);
+ fSamples[1] = LoadPreparedInputColor(rowCol[1] + iSrcInputPos);
+ fSamples[2] = LoadPreparedInputColor(rowCol[2] + iSrcInputPos);
+ fSamples[3] = LoadPreparedInputColor(rowCol[3] + iSrcInputPos);
+ fSamples[4] = LoadPreparedInputColor(rowCol[4] + iSrcInputPos);
+ FFXM_UNROLL
+ for (FfxInt32 idx = 0; idx < iLanczos2SampleCount; idx++)
+ {
+ const FfxInt32x2 sampleColRow = rowCol[idx];
+ const FFXM_MIN16_F2 fOffset = FFXM_MIN16_F2(sampleColRow);
+ FFXM_MIN16_F2 fSrcSampleOffset = fBaseSampleOffset + fOffset;
+
+ FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow;
+ FFXM_MIN16_F fSampleWeight = FFXM_MIN16_F(GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelBias));
+
+ fColorAndWeight += FFXM_MIN16_F4(fSamples[idx] * fSampleWeight, fSampleWeight);
+
+ // Update rectification box
+ {
+ const FFXM_MIN16_F fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset);
+ const FFXM_MIN16_F fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq);
+
+ const FfxBoolean bInitialSample = (idx == 0);
+ RectificationBoxAddSample(bInitialSample, clippingBox, fSamples[idx], fBoxSampleWeight);
+ }
+ }
+
+#endif
+
+ RectificationBoxComputeVarianceBoxData(clippingBox);
+
+ fColorAndWeight.w *= FFXM_MIN16_F(fColorAndWeight.w > FSR2_EPSILON);
+
+ if (fColorAndWeight.w > FSR2_EPSILON) {
+ // Normalize for deringing (we need to compare colors)
+ fColorAndWeight.xyz = fColorAndWeight.xyz / fColorAndWeight.w;
+ fColorAndWeight.w = FFXM_MIN16_F(fColorAndWeight.w*fUpsampleLanczosWeightScale);
+
+ Deringing(clippingBox, fColorAndWeight.xyz);
+ }
+ return fColorAndWeight;
+}
+
+#endif //!defined( FFXM_FSR2_UPSAMPLE_H )
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_upsample.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_upsample.h.meta
new file mode 100644
index 0000000..00d48d1
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/fsr2/ffxm_fsr2_upsample.h.meta
@@ -0,0 +1,67 @@
+fileFormatVersion: 2
+guid: adbae71b3f272394a895f14e3c09e3e2
+PluginImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ iconMap: {}
+ executionOrder: {}
+ defineConstraints: []
+ isPreloaded: 0
+ isOverridable: 1
+ isExplicitlyReferenced: 0
+ validateReferences: 1
+ platformData:
+ - first:
+ : Any
+ second:
+ enabled: 0
+ settings:
+ Exclude Android: 1
+ Exclude Editor: 1
+ Exclude GameCoreScarlett: 1
+ Exclude GameCoreXboxOne: 1
+ Exclude Linux64: 1
+ Exclude OSXUniversal: 1
+ Exclude PS4: 1
+ Exclude PS5: 1
+ Exclude WebGL: 1
+ Exclude Win: 1
+ Exclude Win64: 1
+ - first:
+ Any:
+ second:
+ enabled: 0
+ settings: {}
+ - first:
+ Editor: Editor
+ second:
+ enabled: 0
+ settings:
+ DefaultValueInitialized: true
+ - first:
+ Standalone: Linux64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: OSXUniversal
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd.meta
new file mode 100644
index 0000000..484847f
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: bc1175974e28a1344bca96b5e00fc1cf
+folderAsset: yes
+DefaultImporter:
+ externalObjects: {}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd/ffxm_spd.h b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd/ffxm_spd.h
new file mode 100644
index 0000000..c9322ce
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd/ffxm_spd.h
@@ -0,0 +1,1013 @@
+// Copyright © 2023 Advanced Micro Devices, Inc.
+// Copyright © 2024 Arm Limited.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+/// @defgroup FfxGPUSpd FidelityFX SPD
+/// FidelityFX Single Pass Downsampler 2.0 GPU documentation
+///
+/// @ingroup FfxGPUEffects
+
+/// Setup required constant values for SPD (CPU).
+///
+/// @param [out] dispatchThreadGroupCountXY CPU side: dispatch thread group count xy. z is number of slices of the input texture
+/// @param [out] workGroupOffset GPU side: pass in as constant
+/// @param [out] numWorkGroupsAndMips GPU side: pass in as constant
+/// @param [in] rectInfo left, top, width, height
+/// @param [in] mips optional: if -1, calculate based on rect width and height
+///
+/// @ingroup FfxGPUSpd
+#if defined(FFXM_CPU)
+FFXM_STATIC void ffxSpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY,
+ FfxUInt32x2 workGroupOffset,
+ FfxUInt32x2 numWorkGroupsAndMips,
+ FfxUInt32x4 rectInfo,
+ FfxInt32 mips)
+{
+ // determines the offset of the first tile to downsample based on
+ // left (rectInfo[0]) and top (rectInfo[1]) of the subregion.
+ workGroupOffset[0] = rectInfo[0] / 64;
+ workGroupOffset[1] = rectInfo[1] / 64;
+
+ FfxUInt32 endIndexX = (rectInfo[0] + rectInfo[2] - 1) / 64; // rectInfo[0] = left, rectInfo[2] = width
+ FfxUInt32 endIndexY = (rectInfo[1] + rectInfo[3] - 1) / 64; // rectInfo[1] = top, rectInfo[3] = height
+
+ // we only need to dispatch as many thread groups as tiles we need to downsample
+ // number of tiles per slice depends on the subregion to downsample
+ dispatchThreadGroupCountXY[0] = endIndexX + 1 - workGroupOffset[0];
+ dispatchThreadGroupCountXY[1] = endIndexY + 1 - workGroupOffset[1];
+
+ // number of thread groups per slice
+ numWorkGroupsAndMips[0] = (dispatchThreadGroupCountXY[0]) * (dispatchThreadGroupCountXY[1]);
+
+ if (mips >= 0)
+ {
+ numWorkGroupsAndMips[1] = FfxUInt32(mips);
+ }
+ else
+ {
+ // calculate based on rect width and height
+ FfxUInt32 resolution = ffxMax(rectInfo[2], rectInfo[3]);
+ numWorkGroupsAndMips[1] = FfxUInt32((ffxMin(floor(log2(FfxFloat32(resolution))), FfxFloat32(12))));
+ }
+}
+
+/// Setup required constant values for SPD (CPU).
+///
+/// @param [out] dispatchThreadGroupCountXY CPU side: dispatch thread group count xy. z is number of slices of the input texture
+/// @param [out] workGroupOffset GPU side: pass in as constant
+/// @param [out] numWorkGroupsAndMips GPU side: pass in as constant
+/// @param [in] rectInfo left, top, width, height
+///
+/// @ingroup FfxGPUSpd
+FFXM_STATIC void ffxSpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY,
+ FfxUInt32x2 workGroupOffset,
+ FfxUInt32x2 numWorkGroupsAndMips,
+ FfxUInt32x4 rectInfo)
+{
+ ffxSpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo, -1);
+}
+#endif // #if defined(FFXM_CPU)
+
+
+//==============================================================================================================================
+// NON-PACKED VERSION
+//==============================================================================================================================
+#if defined(FFXM_GPU)
+#if defined(FFXM_SPD_PACKED_ONLY)
+// Avoid compiler errors by including default implementations of these callbacks.
+FfxFloat32x4 SpdLoadSourceImage(FfxInt32x2 p, FfxUInt32 slice)
+{
+ return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
+}
+
+FfxFloat32x4 SpdLoad(FfxInt32x2 p, FfxUInt32 slice)
+{
+ return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
+}
+void SpdStore(FfxInt32x2 p, FfxFloat32x4 value, FfxUInt32 mip, FfxUInt32 slice)
+{
+}
+FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y)
+{
+ return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
+}
+void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value)
+{
+}
+FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3)
+{
+ return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
+}
+#endif // #if FFXM_SPD_PACKED_ONLY
+
+//_____________________________________________________________/\_______________________________________________________________
+#if defined(FFXM_GLSL) && !defined(FFXM_SPD_NO_WAVE_OPERATIONS)
+//#extension GL_KHR_shader_subgroup_quad:require
+#endif
+
+void ffxSpdWorkgroupShuffleBarrier()
+{
+ FFXM_GROUP_MEMORY_BARRIER;
+}
+
+// Only last active workgroup should proceed
+bool SpdExitWorkgroup(FfxUInt32 numWorkGroups, FfxUInt32 localInvocationIndex, FfxUInt32 slice)
+{
+ // global atomic counter
+ if (localInvocationIndex == 0)
+ {
+ SpdIncreaseAtomicCounter(slice);
+ }
+
+ ffxSpdWorkgroupShuffleBarrier();
+ return (SpdGetAtomicCounter() != (numWorkGroups - 1));
+}
+
+// User defined: FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3);
+FfxFloat32x4 SpdReduceQuad(FfxFloat32x4 v)
+{
+#if defined(FFXM_GLSL) && !defined(FFXM_SPD_NO_WAVE_OPERATIONS)
+
+ FfxFloat32x4 v0 = v;
+ FfxFloat32x4 v1 = subgroupQuadSwapHorizontal(v);
+ FfxFloat32x4 v2 = subgroupQuadSwapVertical(v);
+ FfxFloat32x4 v3 = subgroupQuadSwapDiagonal(v);
+ return SpdReduce4(v0, v1, v2, v3);
+
+#elif defined(FFXM_HLSL) && !defined(FFXM_SPD_NO_WAVE_OPERATIONS)
+
+ // requires SM6.0
+ FfxFloat32x4 v0 = v;
+ FfxFloat32x4 v1 = QuadReadAcrossX(v);
+ FfxFloat32x4 v2 = QuadReadAcrossY(v);
+ FfxFloat32x4 v3 = QuadReadAcrossDiagonal(v);
+ return SpdReduce4(v0, v1, v2, v3);
+/*
+ // if SM6.0 is not available, you can use the AMD shader intrinsics
+ // the AMD shader intrinsics are available in AMD GPU Services (AGS) library:
+ // https://gpuopen.com/amd-gpu-services-ags-library/
+ // works for DX11
+ FfxFloat32x4 v0 = v;
+ FfxFloat32x4 v1;
+ v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+ v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+ v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+ v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+ FfxFloat32x4 v2;
+ v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+ v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+ v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+ v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+ FfxFloat32x4 v3;
+ v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+ v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+ v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+ v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+ return SpdReduce4(v0, v1, v2, v3);
+ */
+#endif
+ return v;
+}
+
+FfxFloat32x4 SpdReduceIntermediate(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3)
+{
+ FfxFloat32x4 v0 = SpdLoadIntermediate(i0.x, i0.y);
+ FfxFloat32x4 v1 = SpdLoadIntermediate(i1.x, i1.y);
+ FfxFloat32x4 v2 = SpdLoadIntermediate(i2.x, i2.y);
+ FfxFloat32x4 v3 = SpdLoadIntermediate(i3.x, i3.y);
+ return SpdReduce4(v0, v1, v2, v3);
+}
+
+FfxFloat32x4 SpdReduceLoad4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
+{
+ FfxFloat32x4 v0 = SpdLoad(FfxInt32x2(i0), slice);
+ FfxFloat32x4 v1 = SpdLoad(FfxInt32x2(i1), slice);
+ FfxFloat32x4 v2 = SpdLoad(FfxInt32x2(i2), slice);
+ FfxFloat32x4 v3 = SpdLoad(FfxInt32x2(i3), slice);
+ return SpdReduce4(v0, v1, v2, v3);
+}
+
+FfxFloat32x4 SpdReduceLoad4(FfxUInt32x2 base, FfxUInt32 slice)
+{
+ return SpdReduceLoad4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
+}
+
+FfxFloat32x4 SpdReduceLoadSourceImage4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
+{
+ FfxFloat32x4 v0 = SpdLoadSourceImage(FfxInt32x2(i0), slice);
+ FfxFloat32x4 v1 = SpdLoadSourceImage(FfxInt32x2(i1), slice);
+ FfxFloat32x4 v2 = SpdLoadSourceImage(FfxInt32x2(i2), slice);
+ FfxFloat32x4 v3 = SpdLoadSourceImage(FfxInt32x2(i3), slice);
+ return SpdReduce4(v0, v1, v2, v3);
+}
+
+FfxFloat32x4 SpdReduceLoadSourceImage(FfxUInt32x2 base, FfxUInt32 slice)
+{
+#if defined(SPD_LINEAR_SAMPLER)
+ return SpdLoadSourceImage(FfxInt32x2(base), slice);
+#else
+ return SpdReduceLoadSourceImage4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
+#endif
+}
+
+void SpdDownsampleMips_0_1_Intrinsics(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+ FfxFloat32x4 v[4];
+
+ FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
+ FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
+ v[0] = SpdReduceLoadSourceImage(tex, slice);
+ SpdStore(pix, v[0], 0, slice);
+
+ tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
+ pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
+ v[1] = SpdReduceLoadSourceImage(tex, slice);
+ SpdStore(pix, v[1], 0, slice);
+
+ tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
+ pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
+ v[2] = SpdReduceLoadSourceImage(tex, slice);
+ SpdStore(pix, v[2], 0, slice);
+
+ tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
+ pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
+ v[3] = SpdReduceLoadSourceImage(tex, slice);
+ SpdStore(pix, v[3], 0, slice);
+
+ if (mip <= 1)
+ return;
+
+ v[0] = SpdReduceQuad(v[0]);
+ v[1] = SpdReduceQuad(v[1]);
+ v[2] = SpdReduceQuad(v[2]);
+ v[3] = SpdReduceQuad(v[3]);
+
+ if ((localInvocationIndex % 4) == 0)
+ {
+ SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2), v[0], 1, slice);
+ SpdStoreIntermediate(x / 2, y / 2, v[0]);
+
+ SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2), v[1], 1, slice);
+ SpdStoreIntermediate(x / 2 + 8, y / 2, v[1]);
+
+ SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2 + 8), v[2], 1, slice);
+ SpdStoreIntermediate(x / 2, y / 2 + 8, v[2]);
+
+ SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2 + 8), v[3], 1, slice);
+ SpdStoreIntermediate(x / 2 + 8, y / 2 + 8, v[3]);
+ }
+}
+
+void SpdDownsampleMips_0_1_LDS(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+ FfxFloat32x4 v[4];
+
+ FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
+ FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
+ v[0] = SpdReduceLoadSourceImage(tex, slice);
+ SpdStore(pix, v[0], 0, slice);
+
+ tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
+ pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
+ v[1] = SpdReduceLoadSourceImage(tex, slice);
+ SpdStore(pix, v[1], 0, slice);
+
+ tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
+ pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
+ v[2] = SpdReduceLoadSourceImage(tex, slice);
+ SpdStore(pix, v[2], 0, slice);
+
+ tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
+ pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
+ v[3] = SpdReduceLoadSourceImage(tex, slice);
+ SpdStore(pix, v[3], 0, slice);
+
+ if (mip <= 1)
+ return;
+
+ for (FfxUInt32 i = 0; i < 4; i++)
+ {
+ SpdStoreIntermediate(x, y, v[i]);
+ ffxSpdWorkgroupShuffleBarrier();
+ if (localInvocationIndex < 64)
+ {
+ v[i] = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
+ SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice);
+ }
+ ffxSpdWorkgroupShuffleBarrier();
+ }
+
+ if (localInvocationIndex < 64)
+ {
+ SpdStoreIntermediate(x + 0, y + 0, v[0]);
+ SpdStoreIntermediate(x + 8, y + 0, v[1]);
+ SpdStoreIntermediate(x + 0, y + 8, v[2]);
+ SpdStoreIntermediate(x + 8, y + 8, v[3]);
+ }
+}
+
+void SpdDownsampleMips_0_1(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#if defined(FFXM_SPD_NO_WAVE_OPERATIONS)
+ SpdDownsampleMips_0_1_LDS(x, y, workGroupID, localInvocationIndex, mip, slice);
+#else
+ SpdDownsampleMips_0_1_Intrinsics(x, y, workGroupID, localInvocationIndex, mip, slice);
+#endif
+}
+
+
+void SpdDownsampleMip_2(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#if defined(FFXM_SPD_NO_WAVE_OPERATIONS)
+ if (localInvocationIndex < 64)
+ {
+ FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
+ SpdStore(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x, y), v, mip, slice);
+ // store to LDS, try to reduce bank conflicts
+ // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ // 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+ // ...
+ // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+ SpdStoreIntermediate(x * 2 + y % 2, y * 2, v);
+ }
+#else
+ FfxFloat32x4 v = SpdLoadIntermediate(x, y);
+ v = SpdReduceQuad(v);
+ // quad index 0 stores result
+ if (localInvocationIndex % 4 == 0)
+ {
+ SpdStore(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
+ SpdStoreIntermediate(x + (y / 2) % 2, y, v);
+ }
+#endif
+}
+
+void SpdDownsampleMip_3(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#if defined(FFXM_SPD_NO_WAVE_OPERATIONS)
+ if (localInvocationIndex < 16)
+ {
+ // x 0 x 0
+ // 0 0 0 0
+ // 0 x 0 x
+ // 0 0 0 0
+ FfxFloat32x4 v =
+ SpdReduceIntermediate(FfxUInt32x2(x * 4 + 0 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 2 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 0 + 1, y * 4 + 2), FfxUInt32x2(x * 4 + 2 + 1, y * 4 + 2));
+ SpdStore(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x, y), v, mip, slice);
+ // store to LDS
+ // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ // 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0
+ // ...
+ // 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0
+ // ...
+ // 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x
+ // ...
+ SpdStoreIntermediate(x * 4 + y, y * 4, v);
+ }
+#else
+ if (localInvocationIndex < 64)
+ {
+ FfxFloat32x4 v = SpdLoadIntermediate(x * 2 + y % 2, y * 2);
+ v = SpdReduceQuad(v);
+ // quad index 0 stores result
+ if (localInvocationIndex % 4 == 0)
+ {
+ SpdStore(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
+ SpdStoreIntermediate(x * 2 + y / 2, y * 2, v);
+ }
+ }
+#endif
+}
+
+void SpdDownsampleMip_4(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#if defined(FFXM_SPD_NO_WAVE_OPERATIONS)
+ if (localInvocationIndex < 4)
+ {
+ // x 0 0 0 x 0 0 0
+ // ...
+ // 0 x 0 0 0 x 0 0
+ FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 8 + 0 + 0 + y * 2, y * 8 + 0),
+ FfxUInt32x2(x * 8 + 4 + 0 + y * 2, y * 8 + 0),
+ FfxUInt32x2(x * 8 + 0 + 1 + y * 2, y * 8 + 4),
+ FfxUInt32x2(x * 8 + 4 + 1 + y * 2, y * 8 + 4));
+ SpdStore(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x, y), v, mip, slice);
+ // store to LDS
+ // x x x x 0 ...
+ // 0 ...
+ SpdStoreIntermediate(x + y * 2, 0, v);
+ }
+#else
+ if (localInvocationIndex < 16)
+ {
+ FfxFloat32x4 v = SpdLoadIntermediate(x * 4 + y, y * 4);
+ v = SpdReduceQuad(v);
+ // quad index 0 stores result
+ if (localInvocationIndex % 4 == 0)
+ {
+ SpdStore(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
+ SpdStoreIntermediate(x / 2 + y, 0, v);
+ }
+ }
+#endif
+}
+
+void SpdDownsampleMip_5(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#if defined(FFXM_SPD_NO_WAVE_OPERATIONS)
+ if (localInvocationIndex < 1)
+ {
+ // x x x x 0 ...
+ // 0 ...
+ FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(0, 0), FfxUInt32x2(1, 0), FfxUInt32x2(2, 0), FfxUInt32x2(3, 0));
+ SpdStore(FfxInt32x2(workGroupID.xy), v, mip, slice);
+ }
+#else
+ if (localInvocationIndex < 4)
+ {
+ FfxFloat32x4 v = SpdLoadIntermediate(localInvocationIndex, 0);
+ v = SpdReduceQuad(v);
+ // quad index 0 stores result
+ if (localInvocationIndex % 4 == 0)
+ {
+ SpdStore(FfxInt32x2(workGroupID.xy), v, mip, slice);
+ }
+ }
+#endif
+}
+
+void SpdDownsampleMips_6_7(FfxUInt32 x, FfxUInt32 y, FfxUInt32 mips, FfxUInt32 slice)
+{
+ FfxInt32x2 tex = FfxInt32x2(x * 4 + 0, y * 4 + 0);
+ FfxInt32x2 pix = FfxInt32x2(x * 2 + 0, y * 2 + 0);
+ FfxFloat32x4 v0 = SpdReduceLoad4(tex, slice);
+ SpdStore(pix, v0, 6, slice);
+
+ tex = FfxInt32x2(x * 4 + 2, y * 4 + 0);
+ pix = FfxInt32x2(x * 2 + 1, y * 2 + 0);
+ FfxFloat32x4 v1 = SpdReduceLoad4(tex, slice);
+ SpdStore(pix, v1, 6, slice);
+
+ tex = FfxInt32x2(x * 4 + 0, y * 4 + 2);
+ pix = FfxInt32x2(x * 2 + 0, y * 2 + 1);
+ FfxFloat32x4 v2 = SpdReduceLoad4(tex, slice);
+ SpdStore(pix, v2, 6, slice);
+
+ tex = FfxInt32x2(x * 4 + 2, y * 4 + 2);
+ pix = FfxInt32x2(x * 2 + 1, y * 2 + 1);
+ FfxFloat32x4 v3 = SpdReduceLoad4(tex, slice);
+ SpdStore(pix, v3, 6, slice);
+
+ if (mips <= 7)
+ return;
+ // no barrier needed, working on values only from the same thread
+
+ FfxFloat32x4 v = SpdReduce4(v0, v1, v2, v3);
+ SpdStore(FfxInt32x2(x, y), v, 7, slice);
+ SpdStoreIntermediate(x, y, v);
+}
+
+void SpdDownsampleNextFour(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 baseMip, FfxUInt32 mips, FfxUInt32 slice)
+{
+ if (mips <= baseMip)
+ return;
+ ffxSpdWorkgroupShuffleBarrier();
+ SpdDownsampleMip_2(x, y, workGroupID, localInvocationIndex, baseMip, slice);
+
+ if (mips <= baseMip + 1)
+ return;
+ ffxSpdWorkgroupShuffleBarrier();
+ SpdDownsampleMip_3(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice);
+
+ if (mips <= baseMip + 2)
+ return;
+ ffxSpdWorkgroupShuffleBarrier();
+ SpdDownsampleMip_4(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice);
+
+ if (mips <= baseMip + 3)
+ return;
+ ffxSpdWorkgroupShuffleBarrier();
+ SpdDownsampleMip_5(workGroupID, localInvocationIndex, baseMip + 3, slice);
+}
+
+/// Downsamples a 64x64 tile based on the work group id.
+/// If after downsampling it's the last active thread group, computes the remaining MIP levels.
+///
+/// @param [in] workGroupID index of the work group / thread group
+/// @param [in] localInvocationIndex index of the thread within the thread group in 1D
+/// @param [in] mips the number of total MIP levels to compute for the input texture
+/// @param [in] numWorkGroups the total number of dispatched work groups / thread groups for this slice
+/// @param [in] slice the slice of the input texture
+///
+/// @ingroup FfxGPUSpd
+void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice)
+{
+ // compute MIP level 0 and 1
+ FfxUInt32x2 sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64);
+ FfxUInt32 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2);
+ FfxUInt32 y = sub_xy.y + 8 * ((localInvocationIndex >> 7));
+ SpdDownsampleMips_0_1(x, y, workGroupID, localInvocationIndex, mips, slice);
+
+ // compute MIP level 2, 3, 4, 5
+ SpdDownsampleNextFour(x, y, workGroupID, localInvocationIndex, 2, mips, slice);
+
+ if (mips <= 6)
+ return;
+
+ // increase the global atomic counter for the given slice and check if it's the last remaining thread group:
+ // terminate if not, continue if yes.
+ if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice))
+ return;
+
+ // reset the global atomic counter back to 0 for the next spd dispatch
+ SpdResetAtomicCounter(slice);
+
+ // After mip 5 there is only a single workgroup left that downsamples the remaining up to 64x64 texels.
+ // compute MIP level 6 and 7
+ SpdDownsampleMips_6_7(x, y, mips, slice);
+
+ // compute MIP level 8, 9, 10, 11
+ SpdDownsampleNextFour(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice);
+}
+/// Downsamples a 64x64 tile based on the work group id and work group offset.
+/// If after downsampling it's the last active thread group, computes the remaining MIP levels.
+///
+/// @param [in] workGroupID index of the work group / thread group
+/// @param [in] localInvocationIndex index of the thread within the thread group in 1D
+/// @param [in] mips the number of total MIP levels to compute for the input texture
+/// @param [in] numWorkGroups the total number of dispatched work groups / thread groups for this slice
+/// @param [in] slice the slice of the input texture
+/// @param [in] workGroupOffset the work group offset. it's (0,0) in case the entire input texture is downsampled.
+///
+/// @ingroup FfxGPUSpd
+void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset)
+{
+ SpdDownsample(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice);
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+//==============================================================================================================================
+// PACKED VERSION
+//==============================================================================================================================
+
+#if FFXM_HALF
+
+#if defined(FFXM_GLSL)
+//#extension GL_EXT_shader_subgroup_extended_types_float16:require
+#endif
+
+FfxFloat16x4 SpdReduceQuadH(FfxFloat16x4 v)
+{
+#if defined(FFXM_GLSL) && !defined(FFXM_SPD_NO_WAVE_OPERATIONS)
+ FfxFloat16x4 v0 = v;
+ FfxFloat16x4 v1 = subgroupQuadSwapHorizontal(v);
+ FfxFloat16x4 v2 = subgroupQuadSwapVertical(v);
+ FfxFloat16x4 v3 = subgroupQuadSwapDiagonal(v);
+ return SpdReduce4H(v0, v1, v2, v3);
+#elif defined(FFXM_HLSL) && !defined(FFXM_SPD_NO_WAVE_OPERATIONS)
+ // requires SM6.0
+ FfxFloat16x4 v0 = v;
+ FfxFloat16x4 v1 = QuadReadAcrossX(v);
+ FfxFloat16x4 v2 = QuadReadAcrossY(v);
+ FfxFloat16x4 v3 = QuadReadAcrossDiagonal(v);
+ return SpdReduce4H(v0, v1, v2, v3);
+/*
+ // if SM6.0 is not available, you can use the AMD shader intrinsics
+ // the AMD shader intrinsics are available in AMD GPU Services (AGS) library:
+ // https://gpuopen.com/amd-gpu-services-ags-library/
+ // works for DX11
+ FfxFloat16x4 v0 = v;
+ FfxFloat16x4 v1;
+ v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+ v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+ v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+ v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+ FfxFloat16x4 v2;
+ v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+ v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+ v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+ v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+ FfxFloat16x4 v3;
+ v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+ v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+ v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+ v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+ return SpdReduce4H(v0, v1, v2, v3);
+ */
+#endif
+ return FfxFloat16x4(0.0, 0.0, 0.0, 0.0);
+}
+
+FfxFloat16x4 SpdReduceIntermediateH(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3)
+{
+ FfxFloat16x4 v0 = SpdLoadIntermediateH(i0.x, i0.y);
+ FfxFloat16x4 v1 = SpdLoadIntermediateH(i1.x, i1.y);
+ FfxFloat16x4 v2 = SpdLoadIntermediateH(i2.x, i2.y);
+ FfxFloat16x4 v3 = SpdLoadIntermediateH(i3.x, i3.y);
+ return SpdReduce4H(v0, v1, v2, v3);
+}
+
+FfxFloat16x4 SpdReduceLoad4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
+{
+ FfxFloat16x4 v0 = SpdLoadH(FfxInt32x2(i0), slice);
+ FfxFloat16x4 v1 = SpdLoadH(FfxInt32x2(i1), slice);
+ FfxFloat16x4 v2 = SpdLoadH(FfxInt32x2(i2), slice);
+ FfxFloat16x4 v3 = SpdLoadH(FfxInt32x2(i3), slice);
+ return SpdReduce4H(v0, v1, v2, v3);
+}
+
+FfxFloat16x4 SpdReduceLoad4H(FfxUInt32x2 base, FfxUInt32 slice)
+{
+ return SpdReduceLoad4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
+}
+
+FfxFloat16x4 SpdReduceLoadSourceImage4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
+{
+ FfxFloat16x4 v0 = SpdLoadSourceImageH(FfxInt32x2(i0), slice);
+ FfxFloat16x4 v1 = SpdLoadSourceImageH(FfxInt32x2(i1), slice);
+ FfxFloat16x4 v2 = SpdLoadSourceImageH(FfxInt32x2(i2), slice);
+ FfxFloat16x4 v3 = SpdLoadSourceImageH(FfxInt32x2(i3), slice);
+ return SpdReduce4H(v0, v1, v2, v3);
+}
+
+FfxFloat16x4 SpdReduceLoadSourceImageH(FfxUInt32x2 base, FfxUInt32 slice)
+{
+#if defined(SPD_LINEAR_SAMPLER)
+ return SpdLoadSourceImageH(FfxInt32x2(base), slice);
+#else
+ return SpdReduceLoadSourceImage4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
+#endif
+}
+
+void SpdDownsampleMips_0_1_IntrinsicsH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice)
+{
+ FfxFloat16x4 v[4];
+
+ FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
+ FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
+ v[0] = SpdReduceLoadSourceImageH(tex, slice);
+ SpdStoreH(pix, v[0], 0, slice);
+
+ tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
+ pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
+ v[1] = SpdReduceLoadSourceImageH(tex, slice);
+ SpdStoreH(pix, v[1], 0, slice);
+
+ tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
+ pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
+ v[2] = SpdReduceLoadSourceImageH(tex, slice);
+ SpdStoreH(pix, v[2], 0, slice);
+
+ tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
+ pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
+ v[3] = SpdReduceLoadSourceImageH(tex, slice);
+ SpdStoreH(pix, v[3], 0, slice);
+
+ if (mips <= 1)
+ return;
+
+ v[0] = SpdReduceQuadH(v[0]);
+ v[1] = SpdReduceQuadH(v[1]);
+ v[2] = SpdReduceQuadH(v[2]);
+ v[3] = SpdReduceQuadH(v[3]);
+
+ if ((localInvocationIndex % 4) == 0)
+ {
+ SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2), v[0], 1, slice);
+ SpdStoreIntermediateH(x / 2, y / 2, v[0]);
+
+ SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2), v[1], 1, slice);
+ SpdStoreIntermediateH(x / 2 + 8, y / 2, v[1]);
+
+ SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2 + 8), v[2], 1, slice);
+ SpdStoreIntermediateH(x / 2, y / 2 + 8, v[2]);
+
+ SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2 + 8), v[3], 1, slice);
+ SpdStoreIntermediateH(x / 2 + 8, y / 2 + 8, v[3]);
+ }
+}
+
+void SpdDownsampleMips_0_1_LDSH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice)
+{
+ FfxFloat16x4 v[4];
+
+ FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
+ FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
+ v[0] = SpdReduceLoadSourceImageH(tex, slice);
+ SpdStoreH(pix, v[0], 0, slice);
+
+ tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
+ pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
+ v[1] = SpdReduceLoadSourceImageH(tex, slice);
+ SpdStoreH(pix, v[1], 0, slice);
+
+ tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
+ pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
+ v[2] = SpdReduceLoadSourceImageH(tex, slice);
+ SpdStoreH(pix, v[2], 0, slice);
+
+ tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
+ pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
+ v[3] = SpdReduceLoadSourceImageH(tex, slice);
+ SpdStoreH(pix, v[3], 0, slice);
+
+ if (mips <= 1)
+ return;
+
+ for (FfxInt32 i = 0; i < 4; i++)
+ {
+ SpdStoreIntermediateH(x, y, v[i]);
+ ffxSpdWorkgroupShuffleBarrier();
+ if (localInvocationIndex < 64)
+ {
+ v[i] = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
+ SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice);
+ }
+ ffxSpdWorkgroupShuffleBarrier();
+ }
+
+ if (localInvocationIndex < 64)
+ {
+ SpdStoreIntermediateH(x + 0, y + 0, v[0]);
+ SpdStoreIntermediateH(x + 8, y + 0, v[1]);
+ SpdStoreIntermediateH(x + 0, y + 8, v[2]);
+ SpdStoreIntermediateH(x + 8, y + 8, v[3]);
+ }
+}
+
+void SpdDownsampleMips_0_1H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice)
+{
+#if defined(FFXM_SPD_NO_WAVE_OPERATIONS)
+ SpdDownsampleMips_0_1_LDSH(x, y, workGroupID, localInvocationIndex, mips, slice);
+#else
+ SpdDownsampleMips_0_1_IntrinsicsH(x, y, workGroupID, localInvocationIndex, mips, slice);
+#endif
+}
+
+
+void SpdDownsampleMip_2H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#if defined(FFXM_SPD_NO_WAVE_OPERATIONS)
+ if (localInvocationIndex < 64)
+ {
+ FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
+ SpdStoreH(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x, y), v, mip, slice);
+ // store to LDS, try to reduce bank conflicts
+ // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ // 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+ // ...
+ // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+ SpdStoreIntermediateH(x * 2 + y % 2, y * 2, v);
+ }
+#else
+ FfxFloat16x4 v = SpdLoadIntermediateH(x, y);
+ v = SpdReduceQuadH(v);
+ // quad index 0 stores result
+ if (localInvocationIndex % 4 == 0)
+ {
+ SpdStoreH(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
+ SpdStoreIntermediateH(x + (y / 2) % 2, y, v);
+ }
+#endif
+}
+
+void SpdDownsampleMip_3H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#if defined(FFXM_SPD_NO_WAVE_OPERATIONS)
+ if (localInvocationIndex < 16)
+ {
+ // x 0 x 0
+ // 0 0 0 0
+ // 0 x 0 x
+ // 0 0 0 0
+ FfxFloat16x4 v =
+ SpdReduceIntermediateH(FfxUInt32x2(x * 4 + 0 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 2 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 0 + 1, y * 4 + 2), FfxUInt32x2(x * 4 + 2 + 1, y * 4 + 2));
+ SpdStoreH(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x, y), v, mip, slice);
+ // store to LDS
+ // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ // 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0
+ // ...
+ // 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0
+ // ...
+ // 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x
+ // ...
+ SpdStoreIntermediateH(x * 4 + y, y * 4, v);
+ }
+#else
+ if (localInvocationIndex < 64)
+ {
+ FfxFloat16x4 v = SpdLoadIntermediateH(x * 2 + y % 2, y * 2);
+ v = SpdReduceQuadH(v);
+ // quad index 0 stores result
+ if (localInvocationIndex % 4 == 0)
+ {
+ SpdStoreH(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
+ SpdStoreIntermediateH(x * 2 + y / 2, y * 2, v);
+ }
+ }
+#endif
+}
+
+void SpdDownsampleMip_4H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#if defined(FFXM_SPD_NO_WAVE_OPERATIONS)
+ if (localInvocationIndex < 4)
+ {
+ // x 0 0 0 x 0 0 0
+ // ...
+ // 0 x 0 0 0 x 0 0
+ FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 8 + 0 + 0 + y * 2, y * 8 + 0),
+ FfxUInt32x2(x * 8 + 4 + 0 + y * 2, y * 8 + 0),
+ FfxUInt32x2(x * 8 + 0 + 1 + y * 2, y * 8 + 4),
+ FfxUInt32x2(x * 8 + 4 + 1 + y * 2, y * 8 + 4));
+ SpdStoreH(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x, y), v, mip, slice);
+ // store to LDS
+ // x x x x 0 ...
+ // 0 ...
+ SpdStoreIntermediateH(x + y * 2, 0, v);
+ }
+#else
+ if (localInvocationIndex < 16)
+ {
+ FfxFloat16x4 v = SpdLoadIntermediateH(x * 4 + y, y * 4);
+ v = SpdReduceQuadH(v);
+ // quad index 0 stores result
+ if (localInvocationIndex % 4 == 0)
+ {
+ SpdStoreH(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
+ SpdStoreIntermediateH(x / 2 + y, 0, v);
+ }
+ }
+#endif
+}
+
+void SpdDownsampleMip_5H(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#if defined(FFXM_SPD_NO_WAVE_OPERATIONS)
+ if (localInvocationIndex < 1)
+ {
+ // x x x x 0 ...
+ // 0 ...
+ FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(0, 0), FfxUInt32x2(1, 0), FfxUInt32x2(2, 0), FfxUInt32x2(3, 0));
+ SpdStoreH(FfxInt32x2(workGroupID.xy), v, mip, slice);
+ }
+#else
+ if (localInvocationIndex < 4)
+ {
+ FfxFloat16x4 v = SpdLoadIntermediateH(localInvocationIndex, 0);
+ v = SpdReduceQuadH(v);
+ // quad index 0 stores result
+ if (localInvocationIndex % 4 == 0)
+ {
+ SpdStoreH(FfxInt32x2(workGroupID.xy), v, mip, slice);
+ }
+ }
+#endif
+}
+
+void SpdDownsampleMips_6_7H(FfxUInt32 x, FfxUInt32 y, FfxUInt32 mips, FfxUInt32 slice)
+{
+ FfxInt32x2 tex = FfxInt32x2(x * 4 + 0, y * 4 + 0);
+ FfxInt32x2 pix = FfxInt32x2(x * 2 + 0, y * 2 + 0);
+ FfxFloat16x4 v0 = SpdReduceLoad4H(tex, slice);
+ SpdStoreH(pix, v0, 6, slice);
+
+ tex = FfxInt32x2(x * 4 + 2, y * 4 + 0);
+ pix = FfxInt32x2(x * 2 + 1, y * 2 + 0);
+ FfxFloat16x4 v1 = SpdReduceLoad4H(tex, slice);
+ SpdStoreH(pix, v1, 6, slice);
+
+ tex = FfxInt32x2(x * 4 + 0, y * 4 + 2);
+ pix = FfxInt32x2(x * 2 + 0, y * 2 + 1);
+ FfxFloat16x4 v2 = SpdReduceLoad4H(tex, slice);
+ SpdStoreH(pix, v2, 6, slice);
+
+ tex = FfxInt32x2(x * 4 + 2, y * 4 + 2);
+ pix = FfxInt32x2(x * 2 + 1, y * 2 + 1);
+ FfxFloat16x4 v3 = SpdReduceLoad4H(tex, slice);
+ SpdStoreH(pix, v3, 6, slice);
+
+ if (mips < 8)
+ return;
+ // no barrier needed, working on values only from the same thread
+
+ FfxFloat16x4 v = SpdReduce4H(v0, v1, v2, v3);
+ SpdStoreH(FfxInt32x2(x, y), v, 7, slice);
+ SpdStoreIntermediateH(x, y, v);
+}
+
+void SpdDownsampleNextFourH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 baseMip, FfxUInt32 mips, FfxUInt32 slice)
+{
+ if (mips <= baseMip)
+ return;
+ ffxSpdWorkgroupShuffleBarrier();
+ SpdDownsampleMip_2H(x, y, workGroupID, localInvocationIndex, baseMip, slice);
+
+ if (mips <= baseMip + 1)
+ return;
+ ffxSpdWorkgroupShuffleBarrier();
+ SpdDownsampleMip_3H(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice);
+
+ if (mips <= baseMip + 2)
+ return;
+ ffxSpdWorkgroupShuffleBarrier();
+ SpdDownsampleMip_4H(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice);
+
+ if (mips <= baseMip + 3)
+ return;
+ ffxSpdWorkgroupShuffleBarrier();
+ SpdDownsampleMip_5H(workGroupID, localInvocationIndex, baseMip + 3, slice);
+}
+
+/// Downsamples a 64x64 tile based on the work group id and work group offset.
+/// If after downsampling it's the last active thread group, computes the remaining MIP levels.
+/// Uses half types.
+///
+/// @param [in] workGroupID index of the work group / thread group
+/// @param [in] localInvocationIndex index of the thread within the thread group in 1D
+/// @param [in] mips the number of total MIP levels to compute for the input texture
+/// @param [in] numWorkGroups the total number of dispatched work groups / thread groups for this slice
+/// @param [in] slice the slice of the input texture
+///
+/// @ingroup FfxGPUSpd
+void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice)
+{
+ FfxUInt32x2 sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64);
+ FfxUInt32 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2);
+ FfxUInt32 y = sub_xy.y + 8 * ((localInvocationIndex >> 7));
+
+ // compute MIP level 0 and 1
+ SpdDownsampleMips_0_1H(x, y, workGroupID, localInvocationIndex, mips, slice);
+
+ // compute MIP level 2, 3, 4, 5
+ SpdDownsampleNextFourH(x, y, workGroupID, localInvocationIndex, 2, mips, slice);
+
+ if (mips < 7)
+ return;
+
+ // increase the global atomic counter for the given slice and check if it's the last remaining thread group:
+ // terminate if not, continue if yes.
+ if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice))
+ return;
+
+ // reset the global atomic counter back to 0 for the next spd dispatch
+ SpdResetAtomicCounter(slice);
+
+ // After mip 5 there is only a single workgroup left that downsamples the remaining up to 64x64 texels.
+ // compute MIP level 6 and 7
+ SpdDownsampleMips_6_7H(x, y, mips, slice);
+
+ // compute MIP level 8, 9, 10, 11
+ SpdDownsampleNextFourH(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice);
+}
+
+/// Downsamples a 64x64 tile based on the work group id and work group offset.
+/// If after downsampling it's the last active thread group, computes the remaining MIP levels.
+/// Uses half types.
+///
+/// @param [in] workGroupID index of the work group / thread group
+/// @param [in] localInvocationIndex index of the thread within the thread group in 1D
+/// @param [in] mips the number of total MIP levels to compute for the input texture
+/// @param [in] numWorkGroups the total number of dispatched work groups / thread groups for this slice
+/// @param [in] slice the slice of the input texture
+/// @param [in] workGroupOffset the work group offset. it's (0,0) in case the entire input texture is downsampled.
+///
+/// @ingroup FfxGPUSpd
+void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset)
+{
+ SpdDownsampleH(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice);
+}
+
+#endif // #if FFXM_HALF
+#endif // #if defined(FFXM_GPU)
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd/ffxm_spd.h.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd/ffxm_spd.h.meta
new file mode 100644
index 0000000..a2617bf
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASR/Shaders/shaders/spd/ffxm_spd.h.meta
@@ -0,0 +1,76 @@
+fileFormatVersion: 2
+guid: 8e7a668559e3ae0419884ca7d3534a47
+PluginImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ iconMap: {}
+ executionOrder: {}
+ defineConstraints: []
+ isPreloaded: 0
+ isOverridable: 1
+ isExplicitlyReferenced: 0
+ validateReferences: 1
+ platformData:
+ - first:
+ : Any
+ second:
+ enabled: 0
+ settings:
+ Exclude Android: 1
+ Exclude Editor: 1
+ Exclude GameCoreScarlett: 1
+ Exclude GameCoreXboxOne: 1
+ Exclude Linux64: 1
+ Exclude OSXUniversal: 1
+ Exclude PS4: 1
+ Exclude PS5: 1
+ Exclude WebGL: 1
+ Exclude Win: 1
+ Exclude Win64: 1
+ - first:
+ Android: Android
+ second:
+ enabled: 0
+ settings:
+ AndroidSharedLibraryType: Executable
+ CPU: ARMv7
+ - first:
+ Any:
+ second:
+ enabled: 0
+ settings: {}
+ - first:
+ Editor: Editor
+ second:
+ enabled: 0
+ settings:
+ CPU: AnyCPU
+ DefaultValueInitialized: true
+ OS: AnyOS
+ - first:
+ Standalone: Linux64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: OSXUniversal
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ - first:
+ Standalone: Win64
+ second:
+ enabled: 0
+ settings:
+ CPU: None
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs
new file mode 100644
index 0000000..e0a8b62
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs
@@ -0,0 +1,132 @@
+using ArmASR;
+using UnityEngine.Experimental.Rendering;
+
+namespace UnityEngine.Rendering.PostProcessing
+{
+ internal abstract class ASRUpscaler: Upscaler
+ {
+ public static bool IsSupported => SystemInfo.supportsComputeShaders;
+
+ protected abstract Asr.Variant Variant { get; }
+
+ private AsrContext _asrContext;
+
+ private Asr.DispatchDescription _dispatchDescription = new();
+ private Asr.GenerateReactiveDescription _genReactiveDescription = new();
+
+ public override void CreateContext(PostProcessRenderContext context, Upscaling config)
+ {
+ // Initialize ASR context
+ Asr.InitializationFlags flags = config.enableFP16 ? Asr.InitializationFlags.EnableFP16Usage : 0;
+ if (context.camera.allowHDR) flags |= Asr.InitializationFlags.EnableHighDynamicRange;
+ if (config.exposureSource == Upscaling.ExposureSource.Auto) flags |= Asr.InitializationFlags.EnableAutoExposure;
+ if (RuntimeUtilities.IsDynamicResolutionEnabled(context.camera)) flags |= Asr.InitializationFlags.EnableDynamicResolution;
+
+ _asrContext = Asr.CreateContext(Variant, config.UpscaleSize, config.MaxRenderSize, context.resources.asrUpscalerShaders, flags);
+ }
+
+ public override void DestroyContext()
+ {
+ base.DestroyContext();
+
+ if (_asrContext != null)
+ {
+ _asrContext.Destroy();
+ _asrContext = null;
+ }
+ }
+
+ public override void Render(PostProcessRenderContext context, Upscaling config)
+ {
+ var cmd = context.command;
+ cmd.BeginSample("ASR");
+
+ SetupDispatchDescription(context, config);
+
+ if (config.autoGenerateReactiveMask)
+ {
+ SetupAutoReactiveDescription(context, config);
+
+ var scaledRenderSize = _genReactiveDescription.RenderSize;
+ cmd.GetTemporaryRT(AsrShaderIDs.UavAutoReactive, scaledRenderSize.x, scaledRenderSize.y, 0, default, GraphicsFormat.R8_UNorm, 1);
+ _genReactiveDescription.OutReactive = new ResourceView(AsrShaderIDs.UavAutoReactive);
+ _asrContext.GenerateReactiveMask(_genReactiveDescription, cmd);
+ _dispatchDescription.Reactive = new ResourceView(AsrShaderIDs.UavAutoReactive);
+ }
+
+ _asrContext.Dispatch(_dispatchDescription, cmd);
+
+ cmd.EndSample("ASR");
+ }
+
+ private void SetupDispatchDescription(PostProcessRenderContext context, Upscaling config)
+ {
+ var camera = context.camera;
+
+ // Set up the main ASR dispatch parameters
+ _dispatchDescription.Color = new ResourceView(context.source);
+ _dispatchDescription.Depth = new ResourceView(Upscaling.GetDepthTexture(context.camera), RenderTextureSubElement.Depth);
+ _dispatchDescription.MotionVectors = new ResourceView(BuiltinRenderTextureType.MotionVectors);
+ _dispatchDescription.Exposure = ResourceView.Unassigned;
+ _dispatchDescription.Reactive = ResourceView.Unassigned;
+ _dispatchDescription.TransparencyAndComposition = ResourceView.Unassigned;
+
+ if (config.exposureSource == Upscaling.ExposureSource.Manual && config.exposure != null) _dispatchDescription.Exposure = new ResourceView(config.exposure);
+ if (config.exposureSource == Upscaling.ExposureSource.Unity) _dispatchDescription.Exposure = new ResourceView(context.autoExposureTexture);
+ if (config.reactiveMask != null) _dispatchDescription.Reactive = new ResourceView(config.reactiveMask);
+ if (config.transparencyAndCompositionMask != null) _dispatchDescription.TransparencyAndComposition = new ResourceView(config.transparencyAndCompositionMask);
+
+ var scaledRenderSize = config.GetScaledRenderSize(context.camera);
+
+ _dispatchDescription.Output = new ResourceView(context.destination);
+ _dispatchDescription.PreExposure = config.preExposure;
+ _dispatchDescription.EnableSharpening = config.performSharpenPass;
+ _dispatchDescription.Sharpness = config.sharpness;
+ _dispatchDescription.JitterOffset = config.JitterOffset;
+ _dispatchDescription.MotionVectorScale.x = -scaledRenderSize.x;
+ _dispatchDescription.MotionVectorScale.y = -scaledRenderSize.y;
+ _dispatchDescription.RenderSize = scaledRenderSize;
+ _dispatchDescription.InputResourceSize = scaledRenderSize;
+ _dispatchDescription.FrameTimeDelta = Time.unscaledDeltaTime;
+ _dispatchDescription.CameraNear = camera.nearClipPlane;
+ _dispatchDescription.CameraFar = camera.farClipPlane;
+ _dispatchDescription.CameraFovAngleVertical = camera.fieldOfView * Mathf.Deg2Rad;
+ _dispatchDescription.ViewSpaceToMetersFactor = 1.0f; // 1 unit is 1 meter in Unity
+ _dispatchDescription.Reset = config.Reset;
+
+ if (SystemInfo.usesReversedZBuffer)
+ {
+ // Swap the near and far clip plane distances as FSR2 expects this when using inverted depth
+ (_dispatchDescription.CameraNear, _dispatchDescription.CameraFar) = (_dispatchDescription.CameraFar, _dispatchDescription.CameraNear);
+ }
+ }
+
+ private void SetupAutoReactiveDescription(PostProcessRenderContext context, Upscaling config)
+ {
+ // Set up the parameters to auto-generate a reactive mask
+ _genReactiveDescription.ColorOpaqueOnly = new ResourceView(config.ColorOpaqueOnly);
+ _genReactiveDescription.ColorPreUpscale = new ResourceView(context.source);
+ _genReactiveDescription.OutReactive = new ResourceView(AsrShaderIDs.UavAutoReactive);
+ _genReactiveDescription.RenderSize = config.GetScaledRenderSize(context.camera);
+ _genReactiveDescription.Scale = config.generateReactiveParameters.scale;
+ _genReactiveDescription.CutoffThreshold = config.generateReactiveParameters.cutoffThreshold;
+ _genReactiveDescription.BinaryValue = config.generateReactiveParameters.binaryValue;
+ _genReactiveDescription.Flags = (Asr.GenerateReactiveFlags)config.generateReactiveParameters.flags;
+ }
+ }
+
+ internal class ASRUpscaler_Quality : ASRUpscaler
+ {
+ protected override Asr.Variant Variant => Asr.Variant.Quality;
+ }
+
+ internal class ASRUpscaler_Balanced : ASRUpscaler
+ {
+ protected override Asr.Variant Variant => Asr.Variant.Balanced;
+ }
+
+ internal class ASRUpscaler_Performance : ASRUpscaler
+ {
+ protected override Asr.Variant Variant => Asr.Variant.Performance;
+ }
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs.meta
new file mode 100644
index 0000000..a6f3e44
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/ASRUpscaler.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: efc2b52b928eb184da8528721dc1503b
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {instanceID: 0}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs
index 00f5f8c..cf8a280 100644
--- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/SGSR2.cs
@@ -3,21 +3,28 @@ using System.Collections;
using System.Collections.Generic;
using UnityEngine;
-public class SGSR2
+public static class SGSR2
{
[Serializable]
public struct Params
{
public Vector2Int renderSize;
public Vector2Int displaySize;
+
public Vector2 renderSizeRcp;
public Vector2 displaySizeRcp;
+
public Vector2 jitterOffset;
+ public Vector2 padding1;
+
public Matrix4x4 clipToPrevClip;
+
public float preExposure;
public float cameraFovAngleHor;
public float cameraNear;
public float minLerpContribution;
+
+ public Vector2 scaleRatio;
public uint bSameCamera;
public uint reset;
}
@@ -25,10 +32,28 @@ public class SGSR2
[Serializable]
public class Shaders
{
- public ComputeShader convert;
-
- public ComputeShader activate;
-
- public ComputeShader upscale;
+ public Shader twoPassFragment;
+ }
+
+ [Serializable]
+ public class ComputeShaders
+ {
+ public TwoPassCompute twoPassCompute;
+ public ThreePassCompute threePassCompute;
+
+ [Serializable]
+ public class TwoPassCompute
+ {
+ public ComputeShader convert;
+ public ComputeShader upscale;
+ }
+
+ [Serializable]
+ public class ThreePassCompute
+ {
+ public ComputeShader convert;
+ public ComputeShader activate;
+ public ComputeShader upscale;
+ }
}
}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs.meta
new file mode 100644
index 0000000..12e48c2
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: def18d58a2ff64f44a2d9f73e487a689
+folderAsset: yes
+DefaultImporter:
+ externalObjects: {}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute
new file mode 100644
index 0000000..5bfb16e
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute
@@ -0,0 +1,109 @@
+#pragma kernel CS
+#include "../sgsr2_birp.hlsl"
+#include "../sgsr2_common.hlsl"
+
+//============================================================================================================
+//
+//
+// Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+//
+//============================================================================================================
+
+TEXTURE2D_X(InputColor) : register(t0);
+TYPED_TEXTURE2D_X(float, InputDepth) : register(t1);
+TYPED_TEXTURE2D_X(float2, InputVelocity) : register(t2);
+RW_TEXTURE2D_X(float4, MotionDepthClipAlphaBuffer) : register(u0);
+RW_TEXTURE2D_X(uint, YCoCgColor) : register(u1);
+
+[numthreads(8, 8, 1)]
+void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID)
+{
+ UNITY_XR_ASSIGN_VIEW_INDEX(gl_GlobalInvocationID.z);
+
+ half Exposure_co_rcp = preExposure;
+ float2 ViewportSizeInverse = displaySizeRcp.xy;
+ uint2 InputPos = gl_GlobalInvocationID.xy;
+
+ float2 gatherCoord = float2(gl_GlobalInvocationID.xy) * ViewportSizeInverse;
+ float2 ViewportUV = gatherCoord + 0.5f * ViewportSizeInverse;
+
+ //derived from ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h
+ //FindNearestDepth
+
+ float4 topleft = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, gatherCoord);
+ float2 v10 = float2(ViewportSizeInverse.x*2.0, 0.0);
+ float4 topRight = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, (gatherCoord+v10));
+ float2 v12 = float2(0.0, ViewportSizeInverse.y*2.0);
+ float4 bottomLeft = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, (gatherCoord+v12));
+ float2 v14 = float2(ViewportSizeInverse.x*2.0, ViewportSizeInverse.y*2.0);
+ float4 bottomRight = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, (gatherCoord+v14));
+ float maxC = DEPTH_NEAREST(DEPTH_NEAREST(DEPTH_NEAREST(topleft.y,topRight.x),bottomLeft.z),bottomRight.w);
+ float topleft4 = DEPTH_NEAREST(DEPTH_NEAREST(DEPTH_NEAREST(topleft.y,topleft.x),topleft.z),topleft.w);
+ float topLeftMax9 = DEPTH_NEAREST(bottomLeft.w,DEPTH_NEAREST(DEPTH_NEAREST(maxC,topleft4),topRight.w));
+
+ float depthclip = 0.0;
+ if (DEPTH_CLIP(maxC))
+ {
+ float topRight4 = DEPTH_NEAREST(DEPTH_NEAREST(DEPTH_NEAREST(topRight.y,topRight.x),topRight.z),topRight.w);
+ float bottomLeft4 = DEPTH_NEAREST(DEPTH_NEAREST(DEPTH_NEAREST(bottomLeft.y,bottomLeft.x),bottomLeft.z),bottomLeft.w);
+ float bottomRight4 = DEPTH_NEAREST(DEPTH_NEAREST(DEPTH_NEAREST(bottomRight.y,bottomRight.x),bottomRight.z),bottomRight.w);
+
+ float Wdepth = 0.0;
+ float Ksep = 1.37e-05f;
+ float Kfov = cameraFovAngleHor;
+ float diagonal_length = length(float2(renderSize));
+ float Ksep_Kfov_diagonal = Ksep * Kfov * diagonal_length;
+
+ float Depthsep = Ksep_Kfov_diagonal * (1.0 - maxC);
+ Wdepth += clamp((Depthsep / (abs(maxC - topleft4) + EPSILON)), 0.0, 1.0);
+ Wdepth += clamp((Depthsep / (abs(maxC - topRight4) + EPSILON)), 0.0, 1.0);
+ Wdepth += clamp((Depthsep / (abs(maxC - bottomLeft4) + EPSILON)), 0.0, 1.0);
+ Wdepth += clamp((Depthsep / (abs(maxC - bottomRight4) + EPSILON)), 0.0, 1.0);
+ depthclip = clamp(1.0f - Wdepth*0.25, 0.0, 1.0);
+ }
+
+ //refer to ue/fsr2 PostProcessFFX_FSR2ConvertVelocity.usf, and using nearest depth for dilated motion
+
+ float2 EncodedVelocity = LOAD_TEXTURE2D_X(InputVelocity, InputPos);
+
+ float2 motion;
+ if (any(abs(EncodedVelocity) > 0.0))
+ {
+ motion = decodeVelocityFromTexture(EncodedVelocity.xy);
+ }
+ else
+ {
+#ifdef REQUEST_NDC_Y_UP
+ float2 ScreenPos = float2(2.0f * ViewportUV.x - 1.0f, 1.0f - 2.0f * ViewportUV.y);
+#else
+ float2 ScreenPos = float2(2.0f * ViewportUV - 1.0f);
+#endif
+ float3 Position = float3(ScreenPos, topLeftMax9); //this_clip
+ float4 PreClip = clipToPrevClip[3] + ((clipToPrevClip[2] * Position.z) + ((clipToPrevClip[1] * ScreenPos.y) + (clipToPrevClip[0] * ScreenPos.x)));
+ float2 PreScreen = PreClip.xy / PreClip.w;
+ motion = Position.xy - PreScreen;
+ }
+
+ ////////////compute luma
+ half3 Colorrgb = LOAD_TEXTURE2D_X(InputColor, InputPos).xyz;
+
+ ///simple tonemap
+ float ColorMax = max(max(Colorrgb.x, Colorrgb.y), Colorrgb.z) + Exposure_co_rcp;
+ Colorrgb /= ColorMax;
+
+ float3 Colorycocg;
+ Colorycocg.x = 0.25 * (Colorrgb.x + 2.0 * Colorrgb.y + Colorrgb.z);
+ Colorycocg.y = clamp(0.5 * Colorrgb.x + 0.5 - 0.5 * Colorrgb.z, 0.0, 1.0);
+ Colorycocg.z = clamp(Colorycocg.x + Colorycocg.y - Colorrgb.x, 0.0, 1.0);
+
+ //now color YCoCG all in the range of [0,1]
+ uint x11 = uint(Colorycocg.x * 2047.5);
+ uint y11 = uint(Colorycocg.y * 2047.5);
+ uint z10 = uint(Colorycocg.z * 1023.5);
+
+ YCoCgColor[COORD_TEXTURE2D_X(InputPos)] = ((x11 << 21u) | (y11 << 10u)) | z10;
+
+ half4 v29 = half4(motion, depthclip, ColorMax);
+ MotionDepthClipAlphaBuffer[COORD_TEXTURE2D_X(InputPos)] = v29;
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute.meta
new file mode 100644
index 0000000..6b0cc00
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_convert.compute.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: 073ee927fbee25841a31cf364834071c
+ComputeShaderImporter:
+ externalObjects: {}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute
new file mode 100644
index 0000000..53be6e6
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute
@@ -0,0 +1,295 @@
+#pragma kernel CS
+//#pragma enable_d3d11_debug_symbols
+#include "../sgsr2_birp.hlsl"
+#include "../sgsr2_common.hlsl"
+
+//============================================================================================================
+//
+//
+// Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+//
+//============================================================================================================
+
+TEXTURE2D_X(PrevHistoryOutput) : register(t0);
+TEXTURE2D_X(MotionDepthClipAlphaBuffer) : register(t1);
+TYPED_TEXTURE2D_X(uint, YCoCgColor) : register(t2);
+RW_TEXTURE2D_X(float4, SceneColorOutput) : register(u0);
+RW_TEXTURE2D_X(float4, HistoryOutput) : register(u1);
+
+[numthreads(8, 8, 1)]
+void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID)
+{
+ UNITY_XR_ASSIGN_VIEW_INDEX(gl_GlobalInvocationID.z);
+
+ float Biasmax_viewportXScale = min(float(displaySize.x) / float(renderSize.x), 1.99); //Biasmax_viewportXScale
+ float scalefactor = min(20.0, pow((float(displaySize.x) / float(renderSize.x)) * (float(displaySize.y) / float(renderSize.y)), 3.0));
+ float f2 = preExposure; //1.0; //preExposure
+ float2 HistoryInfoViewportSizeInverse = displaySizeRcp;
+ float2 HistoryInfoViewportSize = float2(displaySize);
+ float2 InputJitter = jitterOffset;
+ float2 InputInfoViewportSize = float2(renderSize);
+ float2 Hruv = (float2(gl_GlobalInvocationID.xy) + 0.5f) * HistoryInfoViewportSizeInverse;
+ float2 Jitteruv;
+ Jitteruv.x = clamp(Hruv.x + (InputJitter.x * renderSizeRcp.x), 0.0, 1.0);
+ Jitteruv.y = clamp(Hruv.y + (InputJitter.y * renderSizeRcp.y), 0.0, 1.0);
+
+ int2 InputPos = int2(Jitteruv * InputInfoViewportSize);
+ float4 mda = SAMPLE_TEXTURE2D_X_LOD(MotionDepthClipAlphaBuffer, S_LINEAR_CLAMP, Jitteruv, 0).xyzw;
+ float2 Motion = mda.xy;
+
+ ///ScreenPosToViewportScale&Bias
+ float2 PrevUV;
+ PrevUV.x = clamp(-0.5 * Motion.x + Hruv.x, 0.0, 1.0);
+#ifdef REQUEST_NDC_Y_UP
+ PrevUV.y = clamp(0.5 * Motion.y + Hruv.y, 0.0, 1.0);
+#else
+ PrevUV.y = clamp(-0.5 * Motion.y + Hruv.y, 0.0, 1.0);
+#endif
+
+ float depthfactor = mda.z;
+ float ColorMax = mda.w;
+
+ float4 History = SAMPLE_TEXTURE2D_X_LOD(PrevHistoryOutput, S_LINEAR_CLAMP, PrevUV, 0);
+ float3 HistoryColor = History.xyz;
+ float Historyw = History.w;
+ float Wfactor = clamp(abs(Historyw), 0.0, 1.0);
+
+ /////upsample and compute box
+ float4 Upsampledcw = 0.0f;
+ float kernelfactor = clamp(Wfactor + float(reset), 0.0, 1.0);
+ float biasmax = Biasmax_viewportXScale - Biasmax_viewportXScale * kernelfactor;
+ float biasmin = max(1.0f, 0.3 + 0.3 * biasmax);
+ float biasfactor = max(0.25f * depthfactor, kernelfactor);
+ float kernelbias = lerp(biasmax, biasmin, biasfactor);
+ float motion_viewport_len = length(Motion * HistoryInfoViewportSize);
+ float curvebias = lerp(-2.0, -3.0, clamp(motion_viewport_len * 0.02, 0.0, 1.0));
+
+ float3 rectboxcenter = 0.0f;
+ float3 rectboxvar = 0.0f;
+ float rectboxweight = 0.0f;
+ float2 srcpos = float2(InputPos) + 0.5f - InputJitter;
+ float2 srcOutputPos = Hruv * InputInfoViewportSize;
+
+ kernelbias *= 0.5f;
+ float kernelbias2 = kernelbias * kernelbias;
+ float2 srcpos_srcOutputPos = srcpos - srcOutputPos;
+
+ int2 InputPosBtmRight = 1 + InputPos;
+ float2 gatherCoord = float2(InputPos) * renderSizeRcp;
+ uint btmRight = LOAD_TEXTURE2D_X(YCoCgColor, InputPosBtmRight).x;
+ uint4 topleft = GATHER_RED_TEXTURE2D_X(YCoCgColor, S_POINT_CLAMP, gatherCoord);
+ uint2 topRight = 0;
+ uint2 bottomLeft = 0;
+
+ uint sameCameraFrmNum = bSameCamera;
+
+ if (sameCameraFrmNum!=0u)
+ {
+ topRight = GATHER_RED_TEXTURE2D_X(YCoCgColor, S_POINT_CLAMP, gatherCoord + float2(renderSizeRcp.x, 0.0)).yz;
+ bottomLeft = GATHER_RED_TEXTURE2D_X(YCoCgColor, S_POINT_CLAMP, gatherCoord + float2(0.0, renderSizeRcp.y)).xy;
+ }
+ else
+ {
+ uint2 btmRight = GATHER_RED_TEXTURE2D_X(YCoCgColor, S_POINT_CLAMP, gatherCoord + float2(renderSizeRcp.x, renderSizeRcp.y)).xz;
+ bottomLeft.y = btmRight.x;
+ topRight.x = btmRight.y;
+ }
+
+ float3 rectboxmin;
+ float3 rectboxmax;
+ {
+ float3 samplecolor = DecodeColor(bottomLeft.y);
+ float2 baseoffset = srcpos_srcOutputPos + float2(0.0, 1.0);
+ float baseoffset_dot = dot(baseoffset, baseoffset);
+ float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f);
+ float weight = FastLanczos(base);
+ Upsampledcw += float4(samplecolor * weight, weight);
+ float boxweight = exp(baseoffset_dot * curvebias);
+ rectboxmin = samplecolor;
+ rectboxmax = samplecolor;
+ float3 wsample = samplecolor * boxweight;
+ rectboxcenter += wsample;
+ rectboxvar += (samplecolor * wsample);
+ rectboxweight += boxweight;
+ }
+ {
+ float3 samplecolor = DecodeColor(topRight.x);
+ float2 baseoffset = srcpos_srcOutputPos + float2(1.0, 0.0);
+ float baseoffset_dot = dot(baseoffset, baseoffset);
+ float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f);
+ float weight = FastLanczos(base);
+ Upsampledcw += float4(samplecolor * weight, weight);
+ float boxweight = exp(baseoffset_dot * curvebias);
+ rectboxmin = min(rectboxmin, samplecolor);
+ rectboxmax = max(rectboxmax, samplecolor);
+ float3 wsample = samplecolor * boxweight;
+ rectboxcenter += wsample;
+ rectboxvar += (samplecolor * wsample);
+ rectboxweight += boxweight;
+ }
+ {
+ float3 samplecolor = DecodeColor(topleft.x);
+ float2 baseoffset = srcpos_srcOutputPos + float2(-1.0, 0.0);
+ float baseoffset_dot = dot(baseoffset, baseoffset);
+ float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f);
+ float weight = FastLanczos(base);
+ Upsampledcw += float4(samplecolor * weight, weight);
+ float boxweight = exp(baseoffset_dot * curvebias);
+ rectboxmin = min(rectboxmin, samplecolor);
+ rectboxmax = max(rectboxmax, samplecolor);
+ float3 wsample = samplecolor * boxweight;
+ rectboxcenter += wsample;
+ rectboxvar += (samplecolor * wsample);
+ rectboxweight += boxweight;
+ }
+ {
+ float3 samplecolor = DecodeColor(topleft.y);
+ float2 baseoffset = srcpos_srcOutputPos;
+ float baseoffset_dot = dot(baseoffset, baseoffset);
+ float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f);
+ float weight = FastLanczos(base);
+ Upsampledcw += float4(samplecolor * weight, weight);
+ float boxweight = exp(baseoffset_dot * curvebias);
+ rectboxmin = min(rectboxmin, samplecolor);
+ rectboxmax = max(rectboxmax, samplecolor);
+ float3 wsample = samplecolor * boxweight;
+ rectboxcenter += wsample;
+ rectboxvar += (samplecolor * wsample);
+ rectboxweight += boxweight;
+ }
+ {
+ float3 samplecolor = DecodeColor(topleft.z);
+ float2 baseoffset = srcpos_srcOutputPos + float2(0.0, -1.0);
+ float baseoffset_dot = dot(baseoffset, baseoffset);
+ float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f);
+ float weight = FastLanczos(base);
+ Upsampledcw += float4(samplecolor * weight, weight);
+ float boxweight = exp(baseoffset_dot * curvebias);
+ rectboxmin = min(rectboxmin, samplecolor);
+ rectboxmax = max(rectboxmax, samplecolor);
+ float3 wsample = samplecolor * boxweight;
+ rectboxcenter += wsample;
+ rectboxvar += (samplecolor * wsample);
+ rectboxweight += boxweight;
+ }
+
+ if (sameCameraFrmNum!=0u)
+ {
+ {
+ float3 samplecolor = DecodeColor(btmRight);
+ float2 baseoffset = srcpos_srcOutputPos + float2(1.0, 1.0);
+ float baseoffset_dot = dot(baseoffset, baseoffset);
+ float base = clamp(baseoffset_dot * kernelbias2, 0.0, 1.0);
+ float weight = FastLanczos(base);
+ Upsampledcw += float4(samplecolor * weight, weight);
+ float boxweight = exp(baseoffset_dot * curvebias);
+ rectboxmin = min(rectboxmin, samplecolor);
+ rectboxmax = max(rectboxmax, samplecolor);
+ float3 wsample = samplecolor * boxweight;
+ rectboxcenter += wsample;
+ rectboxvar += (samplecolor * wsample);
+ rectboxweight += boxweight;
+ }
+ {
+ float3 samplecolor = DecodeColor(bottomLeft.x);
+ float2 baseoffset = srcpos_srcOutputPos + float2(-1.0, 1.0);
+ float baseoffset_dot = dot(baseoffset, baseoffset);
+ float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f);
+ float weight = FastLanczos(base);
+ Upsampledcw += float4(samplecolor * weight, weight);
+ float boxweight = exp(baseoffset_dot * curvebias);
+ rectboxmin = min(rectboxmin, samplecolor);
+ rectboxmax = max(rectboxmax, samplecolor);
+ float3 wsample = samplecolor * boxweight;
+ rectboxcenter += wsample;
+ rectboxvar += (samplecolor * wsample);
+ rectboxweight += boxweight;
+ }
+ {
+ float3 samplecolor = DecodeColor(topRight.y);
+ float2 baseoffset = srcpos_srcOutputPos + float2(1.0, -1.0);
+ float baseoffset_dot = dot(baseoffset, baseoffset);
+ float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f);
+ float weight = FastLanczos(base);
+ Upsampledcw += float4(samplecolor * weight, weight);
+ float boxweight = exp(baseoffset_dot * curvebias);
+ rectboxmin = min(rectboxmin, samplecolor);
+ rectboxmax = max(rectboxmax, samplecolor);
+ float3 wsample = samplecolor * boxweight;
+ rectboxcenter += wsample;
+ rectboxvar += (samplecolor * wsample);
+ rectboxweight += boxweight;
+ }
+
+ {
+ float3 samplecolor = DecodeColor(topleft.w);
+ float2 baseoffset = srcpos_srcOutputPos + float2(-1.0, -1.0);
+ float baseoffset_dot = dot(baseoffset, baseoffset);
+ float base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f);
+ float weight = FastLanczos(base);
+ Upsampledcw += float4(samplecolor * weight, weight);
+ float boxweight = exp(baseoffset_dot * curvebias);
+ rectboxmin = min(rectboxmin, samplecolor);
+ rectboxmax = max(rectboxmax, samplecolor);
+ float3 wsample = samplecolor * boxweight;
+ rectboxcenter += wsample;
+ rectboxvar += (samplecolor * wsample);
+ rectboxweight += boxweight;
+ }
+ }
+
+ rectboxweight = 1.0 / rectboxweight;
+ rectboxcenter *= rectboxweight;
+ rectboxvar *= rectboxweight;
+ rectboxvar = sqrt(abs(rectboxvar - rectboxcenter * rectboxcenter));
+
+ Upsampledcw.xyz = clamp(Upsampledcw.xyz / Upsampledcw.w, rectboxmin-0.05f, rectboxmax+0.05f);
+ Upsampledcw.w = Upsampledcw.w * (1.0f / 3.0f) ;
+
+ float OneMinusWfactor = 1.0f - Wfactor;
+
+ float baseupdate = OneMinusWfactor - OneMinusWfactor * depthfactor;
+ baseupdate = min(baseupdate, lerp(baseupdate, Upsampledcw.w *10.0f, clamp(10.0f* motion_viewport_len, 0.0, 1.0)));
+ baseupdate = min(baseupdate, lerp(baseupdate, Upsampledcw.w, clamp(motion_viewport_len *0.05f, 0.0, 1.0)));
+ float basealpha = baseupdate;
+
+ float boxscale = max(depthfactor, clamp(motion_viewport_len * 0.05f, 0.0, 1.0));
+ float boxsize = lerp(scalefactor, 1.0f, boxscale);
+ float3 sboxvar = rectboxvar * boxsize;
+ float3 boxmin = rectboxcenter - sboxvar;
+ float3 boxmax = rectboxcenter + sboxvar;
+ rectboxmax = min(rectboxmax, boxmax);
+ rectboxmin = max(rectboxmin, boxmin);
+
+ float3 clampedcolor = clamp(HistoryColor, rectboxmin, rectboxmax);
+ float startLerpValue = minLerpContribution; //MinLerpContribution; //MinLerpContribution;
+ if ((abs(mda.x) + abs(mda.y)) > 0.000001) startLerpValue = 0.0;
+ float lerpcontribution = (any(rectboxmin > HistoryColor) || any(HistoryColor > rectboxmax)) ? startLerpValue : 1.0f;
+
+ HistoryColor = lerp(clampedcolor, HistoryColor, clamp(lerpcontribution, 0.0, 1.0));
+ float basemin = min(basealpha, 0.1f);
+ basealpha = lerp(basemin, basealpha, clamp(lerpcontribution, 0.0, 1.0));
+
+ ////blend color
+ float alphasum = max(EPSILON, basealpha + Upsampledcw.w);
+ float alpha = clamp(Upsampledcw.w / alphasum + float(reset), 0.0, 1.0);
+ Upsampledcw.xyz = lerp(HistoryColor, Upsampledcw.xyz, alpha);
+
+ HistoryOutput[COORD_TEXTURE2D_X(gl_GlobalInvocationID.xy)] = float4(Upsampledcw.xyz, Wfactor);
+
+ ////ycocg to rgb
+ float x_z = Upsampledcw.x - Upsampledcw.z;
+ Upsampledcw.xyz = float3(
+ clamp(x_z + Upsampledcw.y, 0.0, 1.0),
+ clamp(Upsampledcw.x + Upsampledcw.z, 0.0, 1.0),
+ clamp(x_z - Upsampledcw.y, 0.0, 1.0));
+
+ float compMax = max(Upsampledcw.x, Upsampledcw.y);
+ compMax = clamp(max(compMax, Upsampledcw.z), 0.0f, 1.0f);
+ float scale = preExposure / ((1.0f + 600.0f / 65504.0f) - compMax);
+
+ if (ColorMax > 4000.0f) scale = ColorMax;
+ Upsampledcw.xyz = Upsampledcw.xyz * scale;
+ SceneColorOutput[COORD_TEXTURE2D_X(gl_GlobalInvocationID.xy)] = Upsampledcw;
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute.meta
new file mode 100644
index 0000000..ff1bbc3
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_cs/sgsr2_upscale.compute.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: d7bacd7d04c6521499bef936d93921cc
+ComputeShaderImporter:
+ externalObjects: {}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs.meta
new file mode 100644
index 0000000..7344082
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: 097742e23f344d0408435f99f89e1edb
+folderAsset: yes
+DefaultImporter:
+ externalObjects: {}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_birp.shader b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_birp.shader
new file mode 100644
index 0000000..7202ee0
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_birp.shader
@@ -0,0 +1,49 @@
+Shader "TND/sgsr2_2pass_fs"
+{
+ SubShader
+ {
+ Cull Off ZWrite Off ZTest Always
+
+ Pass
+ {
+ Name "Convert"
+
+ HLSLPROGRAM
+ #pragma vertex vert_img
+ #pragma fragment frag_convert
+ #pragma target 4.5
+ #pragma enable_d3d11_debug_symbols
+
+ #include "../sgsr2_birp.hlsl"
+ #include "sgsr2_convert.hlsl"
+
+ void frag_convert(v2f_img i, out float4 MotionDepthClipAlphaBuffer: SV_Target)
+ {
+ sgsr2_convert(i.uv, MotionDepthClipAlphaBuffer);
+ }
+
+ ENDHLSL
+ }
+
+ Pass
+ {
+ Name "Upscale"
+
+ HLSLPROGRAM
+ #pragma vertex vert_img
+ #pragma fragment frag_upscale
+ #pragma target 4.5
+ #pragma enable_d3d11_debug_symbols
+
+ #include "../sgsr2_birp.hlsl"
+ #include "sgsr2_upscale.hlsl"
+
+ void frag_upscale(v2f_img i, out half4 OutputColor: SV_Target)
+ {
+ sgsr2_upscale(i.uv, OutputColor);
+ }
+
+ ENDHLSL
+ }
+ }
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_birp.shader.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_birp.shader.meta
new file mode 100644
index 0000000..6098da7
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_birp.shader.meta
@@ -0,0 +1,3 @@
+fileFormatVersion: 2
+guid: 4451c8b25af942ccaa8d6d0bb46a8e60
+timeCreated: 1735392260
\ No newline at end of file
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl
new file mode 100644
index 0000000..a4b9e7b
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl
@@ -0,0 +1,89 @@
+#include "../sgsr2_common.hlsl"
+
+//============================================================================================================
+//
+//
+// Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+//
+//============================================================================================================
+
+// precision highp float;
+// precision highp int;
+
+// TODO: should use the SAMPLE_DEPTH_TEXTURE macros here?
+TYPED_TEXTURE2D_X(half, _CameraDepthTexture);
+TYPED_TEXTURE2D_X(half2, _CameraMotionVectorsTexture);
+#define InputDepth _CameraDepthTexture
+#define InputVelocity _CameraMotionVectorsTexture
+
+void sgsr2_convert(const half2 texCoord, out float4 MotionDepthClipAlphaBuffer)
+{
+ uint2 InputPos = uint2(texCoord * renderSize);
+ float2 gatherCoord = texCoord - 0.5f * renderSizeRcp;
+
+ // texture gather to find nearest depth
+ // a b c d
+ // e f g h
+ // i j k l
+ // m n o p
+ //btmLeft mnji
+ //btmRight oplk
+ //topLeft efba
+ //topRight ghdc
+
+ float4 btmLeft = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, gatherCoord);
+ float2 v10 = float2(renderSizeRcp.x * 2.0f, 0.0);
+ float4 btmRight = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, (gatherCoord+v10));
+ float2 v12 = float2(0.0, renderSizeRcp.y * 2.0f);
+ float4 topLeft = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, (gatherCoord+v12));
+ float2 v14 = float2(renderSizeRcp.x * 2.0f, renderSizeRcp.y * 2.0f);
+ float4 topRight = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, (gatherCoord+v14));
+ float maxC = DEPTH_NEAREST(DEPTH_NEAREST(DEPTH_NEAREST(btmLeft.z,btmRight.w),topLeft.y),topRight.x);
+ float btmLeft4 = DEPTH_NEAREST(DEPTH_NEAREST(DEPTH_NEAREST(btmLeft.y,btmLeft.x),btmLeft.z),btmLeft.w);
+ float btmLeftMax9 = DEPTH_NEAREST(topLeft.x,DEPTH_NEAREST(DEPTH_NEAREST(maxC,btmLeft4),btmRight.x));
+
+ float depthclip = 0.0;
+ if (DEPTH_CLIP(maxC))
+ {
+ float btmRight4 = DEPTH_NEAREST(DEPTH_NEAREST(DEPTH_NEAREST(btmRight.y,btmRight.x),btmRight.z),btmRight.w);
+ float topLeft4 = DEPTH_NEAREST(DEPTH_NEAREST(DEPTH_NEAREST(topLeft.y,topLeft.x),topLeft.z),topLeft.w);
+ float topRight4 = DEPTH_NEAREST(DEPTH_NEAREST(DEPTH_NEAREST(topRight.y,topRight.x),topRight.z),topRight.w);
+
+ float Wdepth = 0.0;
+ float Ksep = 1.37e-05f;
+ float Kfov = cameraFovAngleHor;
+ float diagonal_length = length(renderSize);
+ float Ksep_Kfov_diagonal = Ksep * Kfov * diagonal_length;
+
+ float Depthsep = Ksep_Kfov_diagonal * (1.0 - maxC);
+ Wdepth += clamp((Depthsep / (abs(maxC - btmLeft4) + EPSILON)), 0.0, 1.0);
+ Wdepth += clamp((Depthsep / (abs(maxC - btmRight4) + EPSILON)), 0.0, 1.0);
+ Wdepth += clamp((Depthsep / (abs(maxC - topLeft4) + EPSILON)), 0.0, 1.0);
+ Wdepth += clamp((Depthsep / (abs(maxC - topRight4) + EPSILON)), 0.0, 1.0);
+ depthclip = clamp(1.0f - Wdepth * 0.25, 0.0, 1.0);
+ }
+
+ //refer to ue/fsr2 PostProcessFFX_FSR2ConvertVelocity.usf, and using nearest depth for dilated motion
+
+ float2 EncodedVelocity = LOAD_TEXTURE2D_X(InputVelocity, int2(InputPos));
+
+ float2 motion;
+ if (any(abs(EncodedVelocity)) > 0.0)
+ {
+ motion = decodeVelocityFromTexture(EncodedVelocity.xy);
+ }
+ else
+ {
+#ifdef REQUEST_NDC_Y_UP
+ float2 ScreenPos = float2(2.0f * texCoord.x - 1.0f, 1.0f - 2.0f * texCoord.y);
+#else
+ float2 ScreenPos = float2(2.0f * texCoord - 1.0f);
+#endif
+ float3 Position = float3(ScreenPos, btmLeftMax9); //this_clip
+ float4 PreClip = clipToPrevClip[3] + ((clipToPrevClip[2] * Position.z) + ((clipToPrevClip[1] * ScreenPos.y) + (clipToPrevClip[0] * ScreenPos.x)));
+ float2 PreScreen = PreClip.xy / PreClip.w;
+ motion = Position.xy - PreScreen;
+ }
+ MotionDepthClipAlphaBuffer = float4(motion, depthclip, 0.0);
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl.meta
new file mode 100644
index 0000000..89cdb2c
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_convert.hlsl.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: 3e8c4c408c337364291ae0e57dc25f28
+DefaultImporter:
+ externalObjects: {}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_ppv2.shader b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_ppv2.shader
new file mode 100644
index 0000000..bf4ecc7
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_ppv2.shader
@@ -0,0 +1,56 @@
+Shader "TND/PPV2/sgsr2_2pass_fs"
+{
+ SubShader
+ {
+ Cull Off ZWrite Off ZTest Always
+
+ Pass
+ {
+ Name "Convert"
+
+ HLSLPROGRAM
+ #pragma vertex VertDefault
+ #pragma fragment FragConvert
+ #pragma target 4.5
+ #pragma enable_d3d11_debug_symbols
+
+ #define UNITY_CG_INCLUDED
+ #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/StdLib.hlsl"
+ #undef EPSILON
+ #include "../sgsr2_birp.hlsl"
+ #include "sgsr2_convert.hlsl"
+
+ void FragConvert(VaryingsDefault i, out float4 MotionDepthClipAlphaBuffer: SV_Target)
+ {
+ sgsr2_convert(i.texcoord, MotionDepthClipAlphaBuffer);
+ }
+
+ ENDHLSL
+ }
+
+ Pass
+ {
+ Name "Upscale"
+
+ HLSLPROGRAM
+ #pragma vertex VertDefault
+ #pragma fragment FragUpscale
+ #pragma target 4.5
+ #pragma enable_d3d11_debug_symbols
+
+ #define UNITY_CG_INCLUDED
+ #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/StdLib.hlsl"
+ #undef EPSILON
+ #include "../sgsr2_birp.hlsl"
+ #include "sgsr2_upscale.hlsl"
+
+ void FragUpscale(VaryingsDefault i, out half4 OutputColor: SV_Target0, out half4 HistoryOutput: SV_Target1)
+ {
+ sgsr2_upscale(i.texcoord, OutputColor);
+ HistoryOutput = OutputColor;
+ }
+
+ ENDHLSL
+ }
+ }
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_ppv2.shader.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_ppv2.shader.meta
new file mode 100644
index 0000000..bad8ad4
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_ppv2.shader.meta
@@ -0,0 +1,9 @@
+fileFormatVersion: 2
+guid: 59bc1035dd975f64d8141148a7088d0a
+ShaderImporter:
+ externalObjects: {}
+ defaultTextures: []
+ nonModifiableTextures: []
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl
new file mode 100644
index 0000000..9466d98
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl
@@ -0,0 +1,263 @@
+#include "../sgsr2_common.hlsl"
+
+//============================================================================================================
+//
+//
+// Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+//
+//============================================================================================================
+
+//precision mediump float;
+//precision highp int;
+
+TYPED_TEXTURE2D_X(half4, PrevOutput);
+TYPED_TEXTURE2D_X(half4, MotionDepthClipAlphaBuffer);
+TYPED_TEXTURE2D_X(half4, InputColor);
+
+// TODO: figure out the appropriate way of handling these SamplerStates in such a way that it all works with SRPs as well
+SamplerState samplerPrevOutput;
+SamplerState samplerMotionDepthClipAlphaBuffer;
+SamplerState samplerInputColor;
+
+void sgsr2_upscale(const half2 texCoord, out half4 Output)
+{
+ half Biasmax_viewportXScale = scaleRatio.x;
+ half scalefactor = scaleRatio.y;
+
+ float2 Hruv = texCoord;
+
+ float2 Jitteruv;
+ Jitteruv.x = clamp(Hruv.x + (jitterOffset.x * renderSizeRcp.x), 0.0, 1.0);
+ Jitteruv.y = clamp(Hruv.y + (jitterOffset.y * renderSizeRcp.y), 0.0, 1.0);
+
+ int2 InputPos = int2(Jitteruv * renderSize);
+
+ float3 mda = SAMPLE_TEXTURE2D_X_LOD(MotionDepthClipAlphaBuffer, S_LINEAR_CLAMP, Jitteruv, 0.0).xyz;
+ float2 Motion = mda.xy;
+
+ float2 PrevUV;
+ PrevUV.x = clamp(-0.5 * Motion.x + Hruv.x, 0.0, 1.0);
+#ifdef REQUEST_NDC_Y_UP
+ PrevUV.y = clamp(0.5 * Motion.y + Hruv.y, 0.0, 1.0);
+#else
+ PrevUV.y = clamp(-0.5 * Motion.y + Hruv.y, 0.0, 1.0);
+#endif
+
+ half depthfactor = mda.z;
+
+ half3 HistoryColor = SAMPLE_TEXTURE2D_X_LOD(PrevOutput, S_LINEAR_CLAMP, PrevUV, 0.0).xyz;
+
+ /////upsample and compute box
+ half4 Upsampledcw = 0.0f;
+ half biasmax = Biasmax_viewportXScale ;
+ half biasmin = max(1.0f, 0.3 + 0.3 * biasmax);
+ half biasfactor = 0.25f * depthfactor;
+ half kernelbias = lerp(biasmax, biasmin, biasfactor);
+ half motion_viewport_len = length(Motion * displaySize);
+ half curvebias = lerp(-2.0, -3.0, clamp(motion_viewport_len * 0.02, 0.0, 1.0));
+
+ half3 rectboxcenter = 0.0f;
+ half3 rectboxvar = 0.0f;
+ half rectboxweight = 0.0;
+ float2 srcpos = half2(InputPos) + 0.5f - jitterOffset;
+
+ kernelbias *= 0.5f;
+ half kernelbias2 = kernelbias * kernelbias;
+ half2 srcpos_srcOutputPos = srcpos - Hruv * renderSize; //srcOutputPos = Hruv * params.renderSize;
+ half3 rectboxmin;
+ half3 rectboxmax;
+ half3 topMid = LOAD_TEXTURE2D_X(InputColor, InputPos + int2(0, 1)).xyz;
+ {
+
+ half3 samplecolor = topMid;
+ half2 baseoffset = srcpos_srcOutputPos + half2(0.0, 1.0);
+ half baseoffset_dot = dot(baseoffset, baseoffset);
+ half base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f);
+ half weight = FastLanczos(base);
+ Upsampledcw += half4(samplecolor * weight, weight);
+ half boxweight = exp(baseoffset_dot * curvebias);
+ rectboxmin = samplecolor;
+ rectboxmax = samplecolor;
+ half3 wsample = samplecolor * boxweight;
+ rectboxcenter += wsample;
+ rectboxvar += (samplecolor * wsample);
+ rectboxweight += boxweight;
+ }
+ half3 rightMid = LOAD_TEXTURE2D_X(InputColor, InputPos + int2(1, 0)).xyz;
+ {
+
+ half3 samplecolor = rightMid;
+ half2 baseoffset = srcpos_srcOutputPos + half2(1.0, 0.0);
+ half baseoffset_dot = dot(baseoffset, baseoffset);
+ half base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f);
+ half weight = FastLanczos(base);
+ Upsampledcw += half4(samplecolor * weight, weight);
+ half boxweight = exp(baseoffset_dot * curvebias);
+ rectboxmin = min(rectboxmin, samplecolor);
+ rectboxmax = max(rectboxmax, samplecolor);
+ half3 wsample = samplecolor * boxweight;
+ rectboxcenter += wsample;
+ rectboxvar += (samplecolor * wsample);
+ rectboxweight += boxweight;
+ }
+ half3 leftMid = LOAD_TEXTURE2D_X(InputColor, InputPos + int2(-1, 0)).xyz;
+ {
+
+ half3 samplecolor = leftMid;
+ half2 baseoffset = srcpos_srcOutputPos + half2(-1.0, 0.0);
+ half baseoffset_dot = dot(baseoffset, baseoffset);
+ half base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f);
+ half weight = FastLanczos(base);
+ Upsampledcw += half4(samplecolor * weight, weight);
+ half boxweight = exp(baseoffset_dot * curvebias);
+ rectboxmin = min(rectboxmin, samplecolor);
+ rectboxmax = max(rectboxmax, samplecolor);
+ half3 wsample = samplecolor * boxweight;
+ rectboxcenter += wsample;
+ rectboxvar += (samplecolor * wsample);
+ rectboxweight += boxweight;
+ }
+ half3 centerMid = LOAD_TEXTURE2D_X(InputColor, InputPos + int2(0, 0)).xyz;
+ {
+
+ half3 samplecolor = centerMid;
+ half2 baseoffset = srcpos_srcOutputPos;
+ half baseoffset_dot = dot(baseoffset, baseoffset);
+ half base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f);
+ half weight = FastLanczos(base);
+ Upsampledcw += half4(samplecolor * weight, weight);
+ half boxweight = exp(baseoffset_dot * curvebias);
+ rectboxmin = min(rectboxmin, samplecolor);
+ rectboxmax = max(rectboxmax, samplecolor);
+ half3 wsample = samplecolor * boxweight;
+ rectboxcenter += wsample;
+ rectboxvar += (samplecolor * wsample);
+ rectboxweight += boxweight;
+ }
+ half3 btmMid = LOAD_TEXTURE2D_X(InputColor, InputPos + int2(0, -1)).xyz;
+ {
+
+ half3 samplecolor = btmMid;
+ half2 baseoffset = srcpos_srcOutputPos + half2(0.0, -1.0);
+ half baseoffset_dot = dot(baseoffset, baseoffset);
+ half base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f);
+ half weight = FastLanczos(base);
+ Upsampledcw += half4(samplecolor * weight, weight);
+ half boxweight = exp(baseoffset_dot * curvebias);
+ rectboxmin = min(rectboxmin, samplecolor);
+ rectboxmax = max(rectboxmax, samplecolor);
+ half3 wsample = samplecolor * boxweight;
+ rectboxcenter += wsample;
+ rectboxvar += (samplecolor * wsample);
+ rectboxweight += boxweight;
+ }
+
+ //if (params.sameCameraFrmNum!=0u) //maybe disable this for ultra performance
+ if (false) //maybe disable this for ultra performance, true could generate more realistic output
+ {
+ {
+ half3 topRight = LOAD_TEXTURE2D_X(InputColor, InputPos + int2(1, 1)).xyz;
+ half3 samplecolor = topRight;
+ half2 baseoffset = srcpos_srcOutputPos + half2(1.0, 1.0);
+ half baseoffset_dot = dot(baseoffset, baseoffset);
+ half base = clamp(baseoffset_dot * kernelbias2, 0.0, 1.0);
+ half weight = FastLanczos(base);
+ Upsampledcw += half4(samplecolor * weight, weight);
+ half boxweight = exp(baseoffset_dot * curvebias);
+ rectboxmin = min(rectboxmin, samplecolor);
+ rectboxmax = max(rectboxmax, samplecolor);
+ half3 wsample = samplecolor * boxweight;
+ rectboxcenter += wsample;
+ rectboxvar += (samplecolor * wsample);
+ rectboxweight += boxweight;
+ }
+ {
+ half3 topLeft = LOAD_TEXTURE2D_X(InputColor, InputPos + int2(-1, 1)).xyz;
+ half3 samplecolor = topLeft;
+ half2 baseoffset = srcpos_srcOutputPos + half2(-1.0, 1.0);
+ half baseoffset_dot = dot(baseoffset, baseoffset);
+ half base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f);
+ half weight = FastLanczos(base);
+ Upsampledcw += half4(samplecolor * weight, weight);
+ half boxweight = exp(baseoffset_dot * curvebias);
+ rectboxmin = min(rectboxmin, samplecolor);
+ rectboxmax = max(rectboxmax, samplecolor);
+ half3 wsample = samplecolor * boxweight;
+ rectboxcenter += wsample;
+ rectboxvar += (samplecolor * wsample);
+ rectboxweight += boxweight;
+ }
+ {
+ half3 btmRight = LOAD_TEXTURE2D_X(InputColor, InputPos + int2(1, -1)).xyz;
+ half3 samplecolor = btmRight;
+ half2 baseoffset = srcpos_srcOutputPos + half2(1.0, -1.0);
+ half baseoffset_dot = dot(baseoffset, baseoffset);
+ half base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f);
+ half weight = FastLanczos(base);
+ Upsampledcw += half4(samplecolor * weight, weight);
+ half boxweight = exp(baseoffset_dot * curvebias);
+ rectboxmin = min(rectboxmin, samplecolor);
+ rectboxmax = max(rectboxmax, samplecolor);
+ half3 wsample = samplecolor * boxweight;
+ rectboxcenter += wsample;
+ rectboxvar += (samplecolor * wsample);
+ rectboxweight += boxweight;
+ }
+
+ {
+ half3 btmLeft = LOAD_TEXTURE2D_X(InputColor, InputPos + int2(-1, -1)).xyz;
+ half3 samplecolor = btmLeft;
+ half2 baseoffset = srcpos_srcOutputPos + half2(-1.0, -1.0);
+ half baseoffset_dot = dot(baseoffset, baseoffset);
+ half base = clamp(baseoffset_dot * kernelbias2, 0.0f, 1.0f);
+ half weight = FastLanczos(base);
+ Upsampledcw += half4(samplecolor * weight, weight);
+ half boxweight = exp(baseoffset_dot * curvebias);
+ rectboxmin = min(rectboxmin, samplecolor);
+ rectboxmax = max(rectboxmax, samplecolor);
+ half3 wsample = samplecolor * boxweight;
+ rectboxcenter += wsample;
+ rectboxvar += (samplecolor * wsample);
+ rectboxweight += boxweight;
+ }
+ }
+
+ rectboxweight = 1.0 / rectboxweight;
+ rectboxcenter *= rectboxweight;
+ rectboxvar *= rectboxweight;
+ rectboxvar = sqrt(abs(rectboxvar - rectboxcenter * rectboxcenter));
+
+ Upsampledcw.xyz = clamp(Upsampledcw.xyz / Upsampledcw.w, rectboxmin-0.075f, rectboxmax+0.075f);
+ Upsampledcw.w = Upsampledcw.w * (1.0f / 3.0f) ;
+
+ half baseupdate = 1.0f - depthfactor;
+ baseupdate = min(baseupdate, lerp(baseupdate, Upsampledcw.w *10.0f, clamp(10.0f* motion_viewport_len, 0.0, 1.0)));
+ baseupdate = min(baseupdate, lerp(baseupdate, Upsampledcw.w, clamp(motion_viewport_len *0.05f, 0.0, 1.0)));
+ half basealpha = baseupdate;
+
+ half boxscale = max(depthfactor, clamp(motion_viewport_len * 0.05f, 0.0, 1.0));
+ half boxsize = lerp(scalefactor, 1.0f, boxscale);
+ half3 sboxvar = rectboxvar * boxsize;
+ half3 boxmin = rectboxcenter - sboxvar;
+ half3 boxmax = rectboxcenter + sboxvar;
+ rectboxmax = min(rectboxmax, boxmax);
+ rectboxmin = max(rectboxmin, boxmin);
+
+ half3 clampedcolor = clamp(HistoryColor, rectboxmin, rectboxmax);
+ half startLerpValue = minLerpContribution;
+ if ((abs(mda.x) + abs(mda.y)) > 0.000001) startLerpValue = 0.0;
+ half lerpcontribution = (any(rectboxmin > HistoryColor) || any(HistoryColor > rectboxmax)) ? startLerpValue : 1.0f;
+
+ HistoryColor = lerp(clampedcolor, HistoryColor, clamp(lerpcontribution, 0.0, 1.0));
+ half basemin = min(basealpha, 0.1f);
+ basealpha = lerp(basemin, basealpha, clamp(lerpcontribution, 0.0, 1.0));
+
+ ////blend color
+ half alphasum = max(EPSILON, basealpha + Upsampledcw.w);
+ half alpha = clamp(Upsampledcw.w / alphasum + reset, 0.0, 1.0);
+
+ Upsampledcw.xyz = lerp(HistoryColor, Upsampledcw.xyz, alpha);
+
+ Output = half4(Upsampledcw.xyz, 0.0);
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl.meta
new file mode 100644
index 0000000..66520fd
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/2_pass_fs/sgsr2_upscale.hlsl.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: b3f52eb20bad6124e8835caaa5938444
+DefaultImporter:
+ externalObjects: {}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs.meta
new file mode 100644
index 0000000..c7f1f9e
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: 30c2581cbd096d349bf552c4b0886d84
+folderAsset: yes
+DefaultImporter:
+ externalObjects: {}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_activate.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute
similarity index 58%
rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_activate.hlsl
rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute
index 5185310..d925608 100644
--- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_activate.hlsl
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute
@@ -1,3 +1,7 @@
+#pragma kernel CS
+#include "../sgsr2_birp.hlsl"
+#include "../sgsr2_common.hlsl"
+
//============================================================================================================
//
//
@@ -6,51 +10,17 @@
//
//============================================================================================================
-#define EPSILON 1.19e-07f
-float DecodeColorY(uint sample32)
-{
- uint x11 = sample32 >> 21u;
- return float(x11) * (1.0 / 2047.5);
-}
-
-uint packHalf2x16(float2 value)
-{
- return f32tof16(value.x) | (f32tof16(value.y) << 16);
-}
-
-float2 unpackHalf2x16(uint x)
-{
- return f16tof32(uint2(x & 0xFFFF, x >> 16));
-}
-
-Texture2D PrevLumaHistory : register(t0);
-Texture2D MotionDepthAlphaBuffer : register(t1);
-Texture2D YCoCgColor : register(t2);
-RWTexture2D MotionDepthClipAlphaBuffer : register(u0);
-RWTexture2D LumaHistory : register(u1);
-
-cbuffer Params : register(b0)
-{
- uint2 renderSize;
- uint2 displaySize;
- float2 ViewportSizeInverse;
- float2 displaySizeRcp;
- float2 jitterOffset;
- float4 clipToPrevClip[4];
- float preExposure;
- float cameraFovAngleHor;
- float cameraNear;
- float MinLerpContribution;
- uint bSameCamera;
- uint reset;
-};
-
-SamplerState s_PointClamp : register(s0);
-SamplerState s_LinearClamp : register(s1);
+TYPED_TEXTURE2D_X(uint, PrevLumaHistory) : register(t0);
+TEXTURE2D_X(MotionDepthAlphaBuffer) : register(t1);
+TYPED_TEXTURE2D_X(uint, YCoCgColor) : register(t2);
+RW_TEXTURE2D_X(float4, MotionDepthClipAlphaBuffer) : register(u0);
+RW_TEXTURE2D_X(uint, LumaHistory) : register(u1);
[numthreads(8, 8, 1)]
-void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
+void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID)
{
+ UNITY_XR_ASSIGN_VIEW_INDEX(gl_GlobalInvocationID.z);
+
int2 sampleOffset[4] = {
int2(-1, -1),
int2(-1, +0),
@@ -58,17 +28,17 @@ void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
int2(+0, +0)
};
- uint2 InputPos = GroupId * uint2(8, 8) + GroupThreadId;
+ uint2 InputPos = gl_GlobalInvocationID.xy;
- float2 ViewportUV = (float2(InputPos) + 0.5f) * ViewportSizeInverse;
- float2 gatherCoord = ViewportUV + 0.5f * ViewportSizeInverse;
- uint luma_reference32 = YCoCgColor.GatherRed(s_PointClamp, gatherCoord).w;
+ float2 ViewportUV = (float2(gl_GlobalInvocationID.xy) + 0.5f) * renderSizeRcp;
+ float2 gatherCoord = ViewportUV + 0.5f * renderSizeRcp;
+ uint luma_reference32 = GATHER_RED_TEXTURE2D_X(YCoCgColor, S_POINT_CLAMP, gatherCoord).w;
float luma_reference = DecodeColorY(luma_reference32);
- float4 mda = MotionDepthAlphaBuffer[InputPos].xyzw; //motion depth alpha
- float depth = mda.z;
- float alphamask = mda.w;
- float2 motion = mda.xy;
+ float4 mda = LOAD_TEXTURE2D_X(MotionDepthAlphaBuffer, gl_GlobalInvocationID.xy).xyzw; //motion depth alpha
+ float depth = mda.z;
+ float alphamask = mda.w;
+ float2 motion = mda.xy;
#ifdef REQUEST_NDC_Y_UP
float2 PrevUV = float2(-0.5f * motion.x + ViewportUV.x, 0.5f * motion.y + ViewportUV.y);
@@ -77,7 +47,7 @@ void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
#endif
float depthclip = 0.0;
- if (depth > 1.0e-05f) {
+ if (DEPTH_CLIP(depth)) {
float2 Prevf_sample = PrevUV * float2(renderSize) - 0.5f;
float2 Prevfrac = Prevf_sample - floor(Prevf_sample);
float OneMinusPrevfacx = 1.0 - Prevfrac.x;
@@ -96,7 +66,7 @@ void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
float Kfov = cameraFovAngleHor;
float Ksep_Kfov_diagonal = Ksep * Kfov * diagonal_length;
for (int index = 0; index < 4; index+=2){
- float4 gPrevdepth = MotionDepthAlphaBuffer.GatherBlue(s_PointClamp, PrevUV, sampleOffset[index]);
+ float4 gPrevdepth = GATHER_BLUE_TEXTURE2D_X_OFFSET(MotionDepthAlphaBuffer, S_POINT_CLAMP, PrevUV, sampleOffset[index]);
float tdepth1 = min(gPrevdepth.x, gPrevdepth.y);
float tdepth2 = min(gPrevdepth.z, gPrevdepth.w);
float fPrevdepth = min(tdepth1, tdepth2);
@@ -105,7 +75,7 @@ void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
float weight = Bilinweights[index];
Wdepth += clamp(Depthsep / (abs(fPrevdepth - depth) + EPSILON), 0.0, 1.0) * weight;
- float2 gPrevdepth2 = MotionDepthAlphaBuffer.GatherBlue(s_PointClamp, PrevUV, sampleOffset[index + int(1)]).zw;
+ float2 gPrevdepth2 = GATHER_BLUE_TEXTURE2D_X_OFFSET(MotionDepthAlphaBuffer, S_POINT_CLAMP, PrevUV, sampleOffset[index + int(1)]).zw;
fPrevdepth = min(min(gPrevdepth2.x, gPrevdepth2.y), tdepth2);
Depthsep = Ksep_Kfov_diagonal * (1.0 - min(fPrevdepth, depth));
weight = Bilinweights[index + int(1)];
@@ -115,7 +85,7 @@ void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
}
float2 current_luma_diff;
- uint prev_luma_diff_pack = PrevLumaHistory.GatherRed(s_PointClamp, PrevUV).w;
+ uint prev_luma_diff_pack = GATHER_RED_TEXTURE2D_X(PrevLumaHistory, S_POINT_CLAMP, PrevUV).w;
float2 prev_luma_diff;
prev_luma_diff.x = unpackHalf2x16(prev_luma_diff_pack >> 16u).x;
prev_luma_diff.y = unpackHalf2x16((prev_luma_diff_pack & uint(0xFFFF))).x;
@@ -136,6 +106,6 @@ void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
}
alphamask = floor(alphamask) + 0.5f * float((current_luma_diff.x != 0.0f) && (abs(current_luma_diff.y) != abs(luma_diff)));
- LumaHistory[InputPos] = (packHalf2x16(float2(current_luma_diff.x, 0.0)) << 16u) | packHalf2x16(float2(current_luma_diff.y, 0.0));
- MotionDepthClipAlphaBuffer[InputPos] = float4(motion, depthclip, alphamask);
+ LumaHistory[COORD_TEXTURE2D_X(InputPos)] = (packHalf2x16(float2(current_luma_diff.x, 0.0)) << 16u) | packHalf2x16(float2(current_luma_diff.y, 0.0));
+ MotionDepthClipAlphaBuffer[COORD_TEXTURE2D_X(InputPos)] = float4(motion, depthclip, alphamask);
}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_activate.compute.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute.meta
similarity index 100%
rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_activate.compute.meta
rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_activate.compute.meta
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute
new file mode 100644
index 0000000..e1cd580
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute
@@ -0,0 +1,104 @@
+#pragma kernel CS
+#include "../sgsr2_birp.hlsl"
+#include "../sgsr2_common.hlsl"
+
+//============================================================================================================
+//
+//
+// Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+// SPDX-License-Identifier: BSD-3-Clause
+//
+//============================================================================================================
+
+TEXTURE2D_X(InputOpaqueColor) : register(t0);
+TEXTURE2D_X(InputColor) : register(t1);
+TYPED_TEXTURE2D_X(float, InputDepth) : register(t2);
+TYPED_TEXTURE2D_X(float2, InputVelocity) : register(t3);
+RW_TEXTURE2D_X(float4, MotionDepthAlphaBuffer) : register(u0);
+RW_TEXTURE2D_X(uint, YCoCgColor) : register(u1);
+
+[numthreads(8, 8, 1)]
+void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID)
+{
+ UNITY_XR_ASSIGN_VIEW_INDEX(gl_GlobalInvocationID.z);
+
+ half h0 = preExposure;
+ uint2 InputPos = gl_GlobalInvocationID.xy;
+
+ float2 gatherCoord = float2(gl_GlobalInvocationID.xy) * renderSizeRcp;
+ float2 ViewportUV = gatherCoord + 0.5f * renderSizeRcp;
+
+ //derived from ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h
+ //FindNearestDepth
+
+ int2 InputPosBtmRight = int2(1, 1) + int2(InputPos);
+ float NearestZ = LOAD_TEXTURE2D_X(InputDepth, InputPosBtmRight).x;
+
+ float4 topleft = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, gatherCoord);
+
+ NearestZ = DEPTH_NEAREST(topleft.x, NearestZ);
+ NearestZ = DEPTH_NEAREST(topleft.y, NearestZ);
+ NearestZ = DEPTH_NEAREST(topleft.z, NearestZ);
+ NearestZ = DEPTH_NEAREST(topleft.w, NearestZ);
+
+ float2 v11 = float2(renderSizeRcp.x, 0.0);
+ float2 topRight = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, (gatherCoord + v11)).yz;
+
+ NearestZ = DEPTH_NEAREST(topRight.x, NearestZ);
+ NearestZ = DEPTH_NEAREST(topRight.y, NearestZ);
+
+ float2 v13 = float2(0.0, renderSizeRcp.y);
+ float2 bottomLeft = GATHER_RED_TEXTURE2D_X(InputDepth, S_POINT_CLAMP, (gatherCoord + v13)).xy;
+
+ NearestZ = DEPTH_NEAREST(bottomLeft.x, NearestZ);
+ NearestZ = DEPTH_NEAREST(bottomLeft.y, NearestZ);
+
+ //refer to ue/fsr2 PostProcessFFX_FSR2ConvertVelocity.usf, and using nearest depth for dilated motion
+
+ float2 EncodedVelocity = LOAD_TEXTURE2D_X(InputVelocity, InputPos);
+
+ float2 motion;
+ if (any(abs(EncodedVelocity) > 0.0))
+ {
+ motion = decodeVelocityFromTexture(EncodedVelocity.xy);
+ }
+ else
+ {
+#ifdef REQUEST_NDC_Y_UP
+ float2 ScreenPos = float2(2.0f * ViewportUV.x - 1.0f, 1.0f - 2.0f * ViewportUV.y);
+#else
+ float2 ScreenPos = float2(2.0f * ViewportUV - 1.0f);
+#endif
+ float3 Position = float3(ScreenPos, NearestZ); //this_clip
+ float4 PreClip = clipToPrevClip[3] + ((clipToPrevClip[2] * Position.z) + ((clipToPrevClip[1] * ScreenPos.y) + (clipToPrevClip[0] * ScreenPos.x)));
+ float2 PreScreen = PreClip.xy / PreClip.w;
+ motion = Position.xy - PreScreen;
+ }
+
+ ////////////compute luma
+ half3 Colorrgb = LOAD_TEXTURE2D_X(InputColor, InputPos).xyz;
+
+ ///simple tonemap
+ Colorrgb /= max(max(Colorrgb.x, Colorrgb.y), Colorrgb.z) + h0;
+
+ float3 Colorycocg;
+ Colorycocg.x = 0.25 * (Colorrgb.x + 2.0 * Colorrgb.y + Colorrgb.z);
+ Colorycocg.y = clamp(0.5 * Colorrgb.x + 0.5 - 0.5 * Colorrgb.z, 0.0, 1.0);
+ Colorycocg.z = clamp(Colorycocg.x + Colorycocg.y - Colorrgb.x, 0.0, 1.0);
+
+ //now color YCoCG all in the range of [0,1]
+ uint x11 = uint(Colorycocg.x * 2047.5);
+ uint y11 = uint(Colorycocg.y * 2047.5);
+ uint z10 = uint(Colorycocg.z * 1023.5);
+
+ half3 Colorprergb = LOAD_TEXTURE2D_X(InputOpaqueColor, InputPos).xyz;
+
+ ///simple tonemap
+ Colorprergb /= max(max(Colorprergb.x, Colorprergb.y), Colorprergb.z) + h0;
+ half3 delta = abs(Colorrgb - Colorprergb);
+ half alpha_mask = max(delta.x, max(delta.y, delta.z));
+ alpha_mask = (0.35f * 1000.0f) * alpha_mask;
+
+ YCoCgColor[COORD_TEXTURE2D_X(InputPos)] = ((x11 << 21u) | (y11 << 10u)) | z10;
+ MotionDepthAlphaBuffer[COORD_TEXTURE2D_X(InputPos)] = float4(motion, NearestZ, alpha_mask);
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_convert.compute.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute.meta
similarity index 100%
rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_convert.compute.meta
rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_convert.compute.meta
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_upscale.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute
similarity index 78%
rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_upscale.hlsl
rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute
index acadb9d..59b24e5 100644
--- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_upscale.hlsl
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute
@@ -1,3 +1,8 @@
+#pragma kernel CS
+//#pragma enable_d3d11_debug_symbols
+#include "../sgsr2_birp.hlsl"
+#include "../sgsr2_common.hlsl"
+
//============================================================================================================
//
//
@@ -6,56 +11,16 @@
//
//============================================================================================================
-float FastLanczos(float base)
-{
- float y = base - 1.0f;
- float y2 = y * y;
- float y_temp = 0.75f * y + y2;
- return y_temp * y2;
-}
-
-float3 DecodeColor(uint sample32)
-{
- uint x11 = sample32 >> 21u;
- uint y11 = sample32 & (2047u << 10u);
- uint z10 = sample32 & 1023u;
- float3 samplecolor;
- samplecolor.x = (float(x11) * (1.0 / 2047.5));
- samplecolor.y = (float(y11) * (4.76953602e-7)) - 0.5;
- samplecolor.z = (float(z10) * (1.0 / 1023.5)) - 0.5;
-
- return samplecolor;
-}
-
-Texture2D PrevHistoryOutput : register(t0);
-Texture2D MotionDepthClipAlphaBuffer : register(t1);
-Texture2D YCoCgColor : register(t2);
-RWTexture2D SceneColorOutput : register(u0);
-RWTexture2D HistoryOutput : register(u1);
-
-cbuffer Params : register(b0)
-{
- uint2 renderSize;
- uint2 displaySize;
- float2 renderSizeRcp;
- float2 displaySizeRcp;
- float2 jitterOffset;
- float4 clipToPrevClip[4];
- float preExposure;
- float cameraFovAngleHor;
- float cameraNear;
- float MinLerpContribution;
- uint bSameCamera;
- uint reset;
-};
-
-SamplerState s_PointClamp : register(s0);
-SamplerState s_LinearClamp : register(s1);
+TEXTURE2D_X(PrevHistoryOutput) : register(t0);
+TEXTURE2D_X(MotionDepthClipAlphaBuffer) : register(t1);
+TYPED_TEXTURE2D_X(uint, YCoCgColor) : register(t2);
+RW_TEXTURE2D_X(float4, SceneColorOutput) : register(u0);
+RW_TEXTURE2D_X(float4, HistoryOutput) : register(u1);
[numthreads(8, 8, 1)]
-void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
+void CS(uint3 gl_GlobalInvocationID : SV_DispatchThreadID)
{
- uint2 InvocationID = GroupId * uint2(8, 8) + GroupThreadId;
+ UNITY_XR_ASSIGN_VIEW_INDEX(gl_GlobalInvocationID.z);
float Biasmax_viewportXScale = min(float(displaySize.x) / float(renderSize.x), 1.99); //Biasmax_viewportXScale
float scalefactor = min(20.0, pow((float(displaySize.x) / float(renderSize.x)) * (float(displaySize.y) / float(renderSize.y)), 3.0));
@@ -64,16 +29,16 @@ void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
float2 HistoryInfoViewportSize = float2(displaySize);
float2 InputJitter = jitterOffset;
float2 InputInfoViewportSize = float2(renderSize);
- float2 Hruv = (float2(InvocationID) + 0.5f) * HistoryInfoViewportSizeInverse;
+ float2 Hruv = (float2(gl_GlobalInvocationID.xy) + 0.5f) * HistoryInfoViewportSizeInverse;
float2 Jitteruv;
- Jitteruv.x = clamp(Hruv.x + (InputJitter.x * HistoryInfoViewportSizeInverse.x), 0.0, 1.0);
- Jitteruv.y = clamp(Hruv.y + (InputJitter.y * HistoryInfoViewportSizeInverse.y), 0.0, 1.0);
+ Jitteruv.x = clamp(Hruv.x + (InputJitter.x * renderSizeRcp.x), 0.0, 1.0);
+ Jitteruv.y = clamp(Hruv.y + (InputJitter.y * renderSizeRcp.y), 0.0, 1.0);
int2 InputPos = int2(Jitteruv * InputInfoViewportSize);
//float2 Motion = texelFetch(MotionDepthClipAlphaBuffer, InputPos, 0).xy;
- float alphab = MotionDepthClipAlphaBuffer[InputPos].w;
- float3 mda = MotionDepthClipAlphaBuffer.SampleLevel(s_LinearClamp, Jitteruv, 0).xyz;
+ float alphab = LOAD_TEXTURE2D_X(MotionDepthClipAlphaBuffer, InputPos).w;
+ float3 mda = SAMPLE_TEXTURE2D_X_LOD(MotionDepthClipAlphaBuffer, S_LINEAR_CLAMP, Jitteruv, 0).xyz;
float2 Motion = mda.xy;
///ScreenPosToViewportScale&Bias
@@ -90,7 +55,7 @@ void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
float alphamask = (alphab - history_value) * 0.001f;
history_value *= 2.0;
- float4 History = PrevHistoryOutput.SampleLevel(s_LinearClamp, PrevUV, 0);
+ float4 History = SAMPLE_TEXTURE2D_X_LOD(PrevHistoryOutput, S_LINEAR_CLAMP, PrevUV, 0);
float3 HistoryColor = History.xyz;
float Historyw = History.w;
float Wfactor = max(clamp(abs(Historyw), 0.0, 1.0), alphamask);
@@ -117,10 +82,10 @@ void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
int2 InputPosBtmRight = 1 + InputPos;
float2 gatherCoord = float2(InputPos) * renderSizeRcp;
- uint btmRight = YCoCgColor[InputPosBtmRight].x;
- uint4 topleft = YCoCgColor.GatherRed(s_PointClamp, gatherCoord);
- uint2 topRight = YCoCgColor.GatherRed(s_PointClamp, gatherCoord + float2(renderSizeRcp.x, 0.0)).yz;
- uint2 bottomLeft = YCoCgColor.GatherRed(s_PointClamp, gatherCoord + float2(0.0, renderSizeRcp.y)).xy;
+ uint btmRight = LOAD_TEXTURE2D_X(YCoCgColor, InputPosBtmRight).x;
+ uint4 topleft = GATHER_RED_TEXTURE2D_X(YCoCgColor, S_POINT_CLAMP, gatherCoord);
+ uint2 topRight = GATHER_RED_TEXTURE2D_X(YCoCgColor, S_POINT_CLAMP, gatherCoord + float2(renderSizeRcp.x, 0.0)).yz;
+ uint2 bottomLeft = GATHER_RED_TEXTURE2D_X(YCoCgColor, S_POINT_CLAMP, gatherCoord + float2(0.0, renderSizeRcp.y)).xy;
float3 rectboxmin;
float3 rectboxmax;
@@ -267,16 +232,15 @@ void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
Upsampledcw.xyz = clamp(Upsampledcw.xyz / Upsampledcw.w, rectboxmin-0.05f, rectboxmax+0.05f);
Upsampledcw.w = Upsampledcw.w * (1.0f / 3.0f) ;
- float tcontribute = history_value * clamp(rectboxvar.x * 10.0f, 0.0, 1.0);
- float OneMinusWfactor = 1.0f - Wfactor;
- tcontribute = tcontribute * OneMinusWfactor;
+ float tcontribute = history_value * clamp(rectboxvar.x * 10.0f, 0.0, 1.0);
+ float OneMinusWfactor = 1.0f - Wfactor;
+ tcontribute = tcontribute * OneMinusWfactor;
float baseupdate = OneMinusWfactor - OneMinusWfactor * depthfactor;
baseupdate = min(baseupdate, lerp(baseupdate, Upsampledcw.w *10.0f, clamp(10.0f* motion_viewport_len, 0.0, 1.0)));
baseupdate = min(baseupdate, lerp(baseupdate, Upsampledcw.w, clamp(motion_viewport_len *0.05f, 0.0, 1.0)));
float basealpha = baseupdate;
- const float EPSILON = 1.192e-07f;
float boxscale = max(depthfactor, clamp(motion_viewport_len * 0.05f, 0.0, 1.0));
float boxsize = lerp(scalefactor, 1.0f, boxscale);
float3 sboxvar = rectboxvar * boxsize;
@@ -286,9 +250,9 @@ void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
rectboxmin = max(rectboxmin, boxmin);
float3 clampedcolor = clamp(HistoryColor, rectboxmin, rectboxmax);
- float lerpcontribution = (any(rectboxmin > HistoryColor) || any(HistoryColor > rectboxmax)) ? tcontribute : 1.0f;
- lerpcontribution = lerpcontribution - lerpcontribution * sqrt(alphamask);
- HistoryColor = lerp(clampedcolor, HistoryColor, clamp(lerpcontribution, 0.0, 1.0));
+ float lerpcontribution = (any(rectboxmin > HistoryColor) || any(HistoryColor > rectboxmax)) ? tcontribute : 1.0f;
+ lerpcontribution = lerpcontribution - lerpcontribution * sqrt(alphamask);
+ HistoryColor = lerp(clampedcolor, HistoryColor, clamp(lerpcontribution, 0.0, 1.0));
float basemin = min(basealpha, 0.1f);
basealpha = lerp(basemin, basealpha, clamp(lerpcontribution, 0.0, 1.0));
@@ -297,7 +261,7 @@ void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
float alpha = clamp(Upsampledcw.w / alphasum + float(reset), 0.0, 1.0);
Upsampledcw.xyz = lerp(HistoryColor, Upsampledcw.xyz, alpha);
- HistoryOutput[InvocationID.xy] = float4(Upsampledcw.xyz, Wfactor);
+ HistoryOutput[COORD_TEXTURE2D_X(gl_GlobalInvocationID.xy)] = float4(Upsampledcw.xyz, Wfactor);
////ycocg to grb
float x_z = Upsampledcw.x - Upsampledcw.z;
@@ -311,5 +275,5 @@ void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
float scale = preExposure / ((1.0f + 1.0f / 65504.0f) - compMax); //(1.0f + 1.0f / 65504.0f) = 1.000015e+00
Upsampledcw.xyz = Upsampledcw.xyz * scale;
- SceneColorOutput[InvocationID.xy] = Upsampledcw;
+ SceneColorOutput[COORD_TEXTURE2D_X(gl_GlobalInvocationID.xy)] = Upsampledcw;
}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_upscale.compute.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute.meta
similarity index 100%
rename from Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_upscale.compute.meta
rename to Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/3_pass_cs/sgsr2_upscale.compute.meta
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_activate.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_activate.compute
deleted file mode 100644
index 76a9d8a..0000000
--- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_activate.compute
+++ /dev/null
@@ -1,3 +0,0 @@
-#pragma kernel CS
-
-#include "shaders/sgsr2_activate.hlsl"
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl
new file mode 100644
index 0000000..be757a0
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl
@@ -0,0 +1,29 @@
+#include "UnityCG.cginc"
+
+#define TEXTURE2D_X(textureName) Texture2D textureName
+#define TYPED_TEXTURE2D_X(type, textureName) Texture2D textureName
+#define RW_TEXTURE2D_X(type, textureName) RWTexture2D textureName
+
+#define COORD_TEXTURE2D_X(pixelCoord) pixelCoord
+
+#define LOAD_TEXTURE2D_X(textureName, unCoord2) textureName[unCoord2]
+#define SAMPLE_TEXTURE2D_X_LOD(textureName, samplerName, coord2, lod) textureName.SampleLevel(samplerName, coord2, lod)
+#define GATHER_RED_TEXTURE2D_X(textureName, samplerName, coord2) textureName.GatherRed(samplerName, coord2)
+#define GATHER_BLUE_TEXTURE2D_X_OFFSET(textureName, samplerName, coord2, offset) textureName.GatherBlue(samplerName, coord2, offset)
+
+SamplerState s_PointClamp : register(s0);
+SamplerState s_LinearClamp : register(s1);
+
+#define S_POINT_CLAMP s_PointClamp
+#define S_LINEAR_CLAMP s_LinearClamp
+
+#define UNITY_XR_ASSIGN_VIEW_INDEX(viewIndex)
+
+inline float2 decodeVelocityFromTexture(float2 ev)
+{
+#if UNITY_UV_STARTS_AT_TOP
+ return float2(ev.x, -ev.y) * 2.0f;
+#else
+ return ev * 2.0f;
+#endif
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl.meta
new file mode 100644
index 0000000..c250b46
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_birp.hlsl.meta
@@ -0,0 +1,3 @@
+fileFormatVersion: 2
+guid: f3edae28a3f74031996d08ca5a87c28e
+timeCreated: 1734775340
\ No newline at end of file
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl
new file mode 100644
index 0000000..2fb1bf4
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl
@@ -0,0 +1,68 @@
+#define EPSILON 1.192e-07f
+
+#if UNITY_UV_STARTS_AT_TOP
+#define REQUEST_NDC_Y_UP
+#endif
+
+#if UNITY_REVERSED_Z
+#define DEPTH_NEAREST(a, b) max((a), (b))
+#define DEPTH_CLIP(depth) ((depth) > 1.0e-05f)
+#else
+#define DEPTH_NEAREST(a, b) min((a), (b))
+#define DEPTH_CLIP(depth) ((depth) < 1.0f - 1.0e-05f)
+#endif
+
+cbuffer cbSGSR2 : register(b0)
+{
+ uint2 renderSize;
+ uint2 displaySize;
+ float2 renderSizeRcp;
+ float2 displaySizeRcp;
+ float2 jitterOffset;
+ float2 padding1;
+ float4 clipToPrevClip[4];
+ float preExposure;
+ float cameraFovAngleHor;
+ float cameraNear;
+ float minLerpContribution;
+ float2 scaleRatio;
+ uint bSameCamera;
+ uint reset;
+};
+
+float FastLanczos(float base)
+{
+ float y = base - 1.0f;
+ float y2 = y * y;
+ float y_temp = 0.75f * y + y2;
+ return y_temp * y2;
+}
+
+float3 DecodeColor(uint sample32)
+{
+ uint x11 = sample32 >> 21u;
+ uint y11 = sample32 & (2047u << 10u);
+ uint z10 = sample32 & 1023u;
+ float3 samplecolor;
+ samplecolor.x = (float(x11) * (1.0 / 2047.5));
+ samplecolor.y = (float(y11) * (4.76953602e-7)) - 0.5;
+ samplecolor.z = (float(z10) * (1.0 / 1023.5)) - 0.5;
+
+ return samplecolor;
+}
+
+float DecodeColorY(uint sample32)
+{
+ uint x11 = sample32 >> 21u;
+ return float(x11) * (1.0 / 2047.5);
+}
+
+uint packHalf2x16(float2 value)
+{
+ return f32tof16(value.x) | (f32tof16(value.y) << 16);
+}
+
+float2 unpackHalf2x16(uint x)
+{
+ return f16tof32(uint2(x & 0xFFFF, x >> 16));
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl.meta
new file mode 100644
index 0000000..d3bb9ed
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_common.hlsl.meta
@@ -0,0 +1,3 @@
+fileFormatVersion: 2
+guid: bff676b8468748078a48f9d10bb7eabd
+timeCreated: 1734795662
\ No newline at end of file
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_convert.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_convert.compute
deleted file mode 100644
index beb2024..0000000
--- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_convert.compute
+++ /dev/null
@@ -1,3 +0,0 @@
-#pragma kernel CS
-
-#include "shaders/sgsr2_convert.hlsl"
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_hdrp.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_hdrp.hlsl
new file mode 100644
index 0000000..6133399
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_hdrp.hlsl
@@ -0,0 +1,17 @@
+#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
+#include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl"
+
+// Using renderSizeRcp here is a bit of a hack, but the SRPs don't offer any macros for gather with offset, and we know which texture the GatherBlue will be used for
+#define GATHER_BLUE_TEXTURE2D_X_OFFSET(textureName, samplerName, coord2, offset) GATHER_BLUE_TEXTURE2D_X(textureName, samplerName, coord2 + offset * renderSizeRcp)
+
+#define S_POINT_CLAMP s_point_clamp_sampler
+#define S_LINEAR_CLAMP s_linear_clamp_sampler
+
+inline float2 decodeVelocityFromTexture(float2 ev)
+{
+#if UNITY_UV_STARTS_AT_TOP
+ return float2(ev.x, -ev.y) * 2.0f;
+#else
+ return ev * 2.0f;
+#endif
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_hdrp.hlsl.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_hdrp.hlsl.meta
new file mode 100644
index 0000000..82f81ea
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_hdrp.hlsl.meta
@@ -0,0 +1,3 @@
+fileFormatVersion: 2
+guid: ae8ec449c111471fb8eecbd6142cb9ad
+timeCreated: 1734870531
\ No newline at end of file
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_upscale.compute b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_upscale.compute
deleted file mode 100644
index 70b4cae..0000000
--- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/sgsr2_upscale.compute
+++ /dev/null
@@ -1,3 +0,0 @@
-#pragma kernel CS
-
-#include "shaders/sgsr2_upscale.hlsl"
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_convert.hlsl b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_convert.hlsl
deleted file mode 100644
index 17d12fa..0000000
--- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2/Shaders/shaders/sgsr2_convert.hlsl
+++ /dev/null
@@ -1,127 +0,0 @@
-//============================================================================================================
-//
-//
-// Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
-// SPDX-License-Identifier: BSD-3-Clause
-//
-//============================================================================================================
-
-float2 decodeVelocityFromTexture(float2 ev) {
- const float inv_div = 1.0f / (0.499f * 0.5f);
- float2 dv;
- dv.xy = ev.xy * inv_div - 32767.0f / 65535.0f * inv_div;
- //dv.z = uintBitsToFloat((uint(round(ev.z * 65535.0f)) << 16) | uint(round(ev.w * 65535.0f)));
- return dv;
-}
-
-Texture2D InputOpaqueColor : register(t0);
-Texture2D InputColor : register(t1);
-Texture2D InputDepth : register(t2);
-Texture2D InputVelocity : register(t3);
-RWTexture2D MotionDepthAlphaBuffer : register(u0);
-RWTexture2D YCoCgColor : register(u1);
-
-cbuffer Params : register(b0)
-{
- uint2 renderSize;
- uint2 displaySize;
- float2 ViewportSizeInverse;
- float2 displaySizeRcp;
- float2 jitterOffset;
- float4 clipToPrevClip[4];
- float preExposure;
- float cameraFovAngleHor;
- float cameraNear;
- float MinLerpContribution;
- uint bSameCamera;
- uint reset;
-};
-
-SamplerState s_PointClamp : register(s0);
-SamplerState s_LinearClamp : register(s1);
-
-[numthreads(8, 8, 1)]
-void CS(uint2 GroupId : SV_GroupID, uint2 GroupThreadId : SV_GroupThreadID)
-{
- uint2 InputPos = GroupId * uint2(8, 8) + GroupThreadId;
-
- float2 gatherCoord = float2(InputPos) * ViewportSizeInverse;
- float2 ViewportUV = gatherCoord + 0.5f * ViewportSizeInverse;
-
- //derived from ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h
- //FindNearestDepth
-
- int2 InputPosBtmRight = int2(1, 1) + int2(InputPos);
- float NearestZ = InputDepth[InputPosBtmRight].x;
-
- float4 topleft = InputDepth.GatherRed(s_PointClamp, gatherCoord);
-
- NearestZ = max(topleft.x, NearestZ);
- NearestZ = max(topleft.y, NearestZ);
- NearestZ = max(topleft.z, NearestZ);
- NearestZ = max(topleft.w, NearestZ);
-
- float2 v11 = float2(ViewportSizeInverse.x, 0.0);
- float2 topRight = InputDepth.GatherRed(s_PointClamp, (gatherCoord + v11)).yz;
-
- NearestZ = max(topRight.x, NearestZ);
- NearestZ = max(topRight.y, NearestZ);
-
- float2 v13 = float2(0.0, ViewportSizeInverse.y);
- float2 bottomLeft = InputDepth.GatherRed(s_PointClamp, (gatherCoord + v13)).xy;
-
- NearestZ = max(bottomLeft.x, NearestZ);
- NearestZ = max(bottomLeft.y, NearestZ);
-
- //refer to ue/fsr2 PostProcessFFX_FSR2ConvertVelocity.usf, and using nearest depth for dilated motion
-
- // TODO: wondering if this whole song and dance about decoding velocity is really necessary for Unity
- float2 EncodedVelocity = InputVelocity[InputPos];
-
- float2 motion;
- if (EncodedVelocity.x > 0.0)
- {
- motion = decodeVelocityFromTexture(EncodedVelocity.xy);
- }
- else
- {
-#ifdef REQUEST_NDC_Y_UP
- float2 ScreenPos = float2(2.0f * ViewportUV.x - 1.0f, 1.0f - 2.0f * ViewportUV.y);
-#else
- float2 ScreenPos = float2(2.0f * ViewportUV - 1.0f);
-#endif
- float3 Position = float3(ScreenPos, NearestZ); //this_clip
- float4 PreClip = clipToPrevClip[3] + ((clipToPrevClip[2] * Position.z) + ((clipToPrevClip[1] * ScreenPos.y) + (clipToPrevClip[0] * ScreenPos.x)));
- float2 PreScreen = PreClip.xy / PreClip.w;
- motion = Position.xy - PreScreen;
- }
-
- motion = EncodedVelocity;
-
- ////////////compute luma
- float3 Colorrgb = InputColor[InputPos].xyz;
-
- ///simple tonemap
- Colorrgb /= max(max(Colorrgb.x, Colorrgb.y), Colorrgb.z) + preExposure;
-
- float3 Colorycocg;
- Colorycocg.x = 0.25 * (Colorrgb.x + 2.0 * Colorrgb.y + Colorrgb.z);
- Colorycocg.y = clamp(0.5 * Colorrgb.x + 0.5 - 0.5 * Colorrgb.z, 0.0, 1.0);
- Colorycocg.z = clamp(Colorycocg.x + Colorycocg.y - Colorrgb.x, 0.0, 1.0);
-
- //now color YCoCG all in the range of [0,1]
- uint x11 = uint(Colorycocg.x * 2047.5);
- uint y11 = uint(Colorycocg.y * 2047.5);
- uint z10 = uint(Colorycocg.z * 1023.5);
-
- float3 Colorprergb = InputOpaqueColor[InputPos].xyz;
-
- ///simple tonemap
- Colorprergb /= max(max(Colorprergb.x, Colorprergb.y), Colorprergb.z) + preExposure;
- float3 delta = abs(Colorrgb - Colorprergb);
- float alpha_mask = max(delta.x, max(delta.y, delta.z));
- alpha_mask = (0.35f * 1000.0f) * alpha_mask;
-
- YCoCgColor[InputPos] = ((x11 << 21u) | (y11 << 10u)) | z10;
- MotionDepthAlphaBuffer[InputPos] = float4(motion, NearestZ, alpha_mask);
-}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs
index 5aa66cc..6c9ef84 100644
--- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler.cs
@@ -4,20 +4,22 @@ using UnityEngine.Experimental.Rendering;
namespace UnityEngine.Rendering.PostProcessing
{
- internal class SGSR2Upscaler: Upscaler
+ internal abstract class SGSR2Upscaler: Upscaler
{
public static bool IsSupported => SystemInfo.supportsComputeShaders;
- private RenderTexture _colorLuma;
- private RenderTexture _motionDepthAlpha;
- private RenderTexture _motionDepthClipAlpha;
- private readonly RenderTexture[] _lumaHistory = new RenderTexture[2];
- private readonly RenderTexture[] _upscaleHistory = new RenderTexture[2];
+ protected abstract string VariantName { get; }
+
+ protected RenderTexture _colorLuma;
+ protected RenderTexture _motionDepthAlpha;
+ protected RenderTexture _motionDepthClipAlpha;
+ protected readonly RenderTexture[] _lumaHistory = new RenderTexture[2];
+ protected readonly RenderTexture[] _upscaleHistory = new RenderTexture[2];
- private readonly ConstantsBuffer _paramsBuffer = new();
+ protected readonly ConstantsBuffer _paramsBuffer = new();
+
+ protected uint _frameCount = 0;
- private uint _frameCount = 0;
-
public override void CreateContext(PostProcessRenderContext context, Upscaling config)
{
CreateRenderTexture(ref _colorLuma, "ColorLuma", config.MaxRenderSize, GraphicsFormat.R32_UInt, true);
@@ -41,11 +43,21 @@ namespace UnityEngine.Rendering.PostProcessing
DestroyRenderTexture(ref _motionDepthAlpha);
DestroyRenderTexture(ref _colorLuma);
}
-
+
public override void Render(PostProcessRenderContext context, Upscaling config)
{
var cmd = context.command;
- cmd.BeginSample("SGSR2");
+ cmd.BeginSample(VariantName);
+
+ Matrix4x4 clipToPrevClip = Matrix4x4.identity;
+ bool isCameraStill = false;
+ if (_frameCount > 0 && !config.Reset)
+ {
+ // We need to use the projection matrix as it is used on the GPU to match what Unity keeps in Camera.previousViewProjectionMatrix
+ Matrix4x4 viewProj = GL.GetGPUProjectionMatrix(context.camera.nonJitteredProjectionMatrix, true) * context.camera.worldToCameraMatrix;
+ clipToPrevClip = context.camera.previousViewProjectionMatrix * viewProj.inverse;
+ isCameraStill = IsCameraStill(viewProj, context.camera.previousViewProjectionMatrix);
+ }
ref var parms = ref _paramsBuffer.Value;
parms.renderSize = config.GetScaledRenderSize(context.camera);
@@ -53,12 +65,13 @@ namespace UnityEngine.Rendering.PostProcessing
parms.renderSizeRcp = new Vector2(1.0f / parms.renderSize.x, 1.0f / parms.renderSize.y);
parms.displaySizeRcp = new Vector2(1.0f / parms.displaySize.x, 1.0f / parms.displaySize.y);
parms.jitterOffset = config.JitterOffset;
- parms.clipToPrevClip = Matrix4x4.identity; // TODO: clipToPrevClip
+ parms.clipToPrevClip = clipToPrevClip;
parms.preExposure = config.preExposure;
- parms.cameraFovAngleHor = Mathf.Tan(context.camera.fieldOfView * Mathf.Deg2Rad * 0.5f) * (float)parms.renderSize.x / parms.renderSize.y;
+ parms.cameraFovAngleHor = Mathf.Tan(context.camera.fieldOfView * Mathf.Deg2Rad * 0.5f) * parms.renderSize.x * parms.renderSizeRcp.y;
parms.cameraNear = context.camera.nearClipPlane;
parms.minLerpContribution = 0f;
- parms.bSameCamera = 0u;
+ parms.scaleRatio = new Vector2(parms.displaySize.x * parms.renderSizeRcp.x, parms.displaySize.y * parms.renderSizeRcp.y);
+ parms.bSameCamera = isCameraStill ? 1u : 0u;
parms.reset = config.Reset ? 1u : 0u;
_paramsBuffer.UpdateBufferData(cmd);
@@ -73,70 +86,24 @@ namespace UnityEngine.Rendering.PostProcessing
cmd.SetRenderTarget(_upscaleHistory[1]);
cmd.ClearRenderTarget(false, true, Color.clear);
}
-
- Convert(cmd, context, config);
- Activate(cmd, context);
- Upscale(cmd, context);
- cmd.EndSample("SGSR2");
+ DoRender(cmd, context, config);
+
+ cmd.EndSample(VariantName);
_frameCount++;
}
- private void Convert(CommandBuffer cmd, PostProcessRenderContext context, Upscaling config)
- {
- var shader = context.resources.computeShaders.sgsr2Upscaler.convert;
- int kernelIndex = shader.FindKernel("CS");
-
- cmd.SetComputeConstantBufferParam(shader, "Params", _paramsBuffer, 0, Marshal.SizeOf());
- cmd.SetComputeTextureParam(shader, kernelIndex, "InputOpaqueColor", config.ColorOpaqueOnly);
- cmd.SetComputeTextureParam(shader, kernelIndex, "InputColor", context.source);
- cmd.SetComputeTextureParam(shader, kernelIndex, "InputDepth", BuiltinRenderTextureType.CameraTarget, 0, RenderTextureSubElement.Depth);
- cmd.SetComputeTextureParam(shader, kernelIndex, "InputVelocity", BuiltinRenderTextureType.MotionVectors);
- cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthAlphaBuffer", _motionDepthAlpha);
- cmd.SetComputeTextureParam(shader, kernelIndex, "YCoCgColor", _colorLuma);
-
- const int threadGroupWorkRegionDim = 8;
- int dispatchSrcX = (_paramsBuffer.Value.renderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
- int dispatchSrcY = (_paramsBuffer.Value.renderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
- cmd.DispatchCompute(shader, kernelIndex, dispatchSrcX, dispatchSrcY, 1);
- }
+ protected abstract void DoRender(CommandBuffer cmd, PostProcessRenderContext context, Upscaling config);
- private void Activate(CommandBuffer cmd, PostProcessRenderContext context)
+ private static bool IsCameraStill(in Matrix4x4 currViewProj, in Matrix4x4 prevViewProj, float threshold = 1e-5f)
{
- var shader = context.resources.computeShaders.sgsr2Upscaler.activate;
- int kernelIndex = shader.FindKernel("CS");
- uint frameIndex = _frameCount % 2;
-
- cmd.SetComputeConstantBufferParam(shader, "Params", _paramsBuffer, 0, Marshal.SizeOf());
- cmd.SetComputeTextureParam(shader, kernelIndex, "PrevLumaHistory", _lumaHistory[frameIndex ^ 1]);
- cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthAlphaBuffer", _motionDepthAlpha);
- cmd.SetComputeTextureParam(shader, kernelIndex, "YCoCgColor", _colorLuma);
- cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthClipAlphaBuffer", _motionDepthClipAlpha);
- cmd.SetComputeTextureParam(shader, kernelIndex, "LumaHistory", _lumaHistory[frameIndex]);
-
- const int threadGroupWorkRegionDim = 8;
- int dispatchSrcX = (_paramsBuffer.Value.renderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
- int dispatchSrcY = (_paramsBuffer.Value.renderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
- cmd.DispatchCompute(shader, kernelIndex, dispatchSrcX, dispatchSrcY, 1);
- }
+ float vpDiff = 0f;
+ for (int i = 0; i < 16; i++)
+ {
+ vpDiff += Mathf.Abs(currViewProj[i] - prevViewProj[i]);
+ }
- private void Upscale(CommandBuffer cmd, PostProcessRenderContext context)
- {
- var shader = context.resources.computeShaders.sgsr2Upscaler.upscale;
- int kernelIndex = shader.FindKernel("CS");
- uint frameIndex = _frameCount % 2;
-
- cmd.SetComputeConstantBufferParam(shader, "Params", _paramsBuffer, 0, Marshal.SizeOf());
- cmd.SetComputeTextureParam(shader, kernelIndex, "PrevHistoryOutput", _upscaleHistory[frameIndex ^ 1]);
- cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthClipAlphaBuffer", _motionDepthClipAlpha);
- cmd.SetComputeTextureParam(shader, kernelIndex, "YCoCgColor", _colorLuma);
- cmd.SetComputeTextureParam(shader, kernelIndex, "SceneColorOutput", context.destination);
- cmd.SetComputeTextureParam(shader, kernelIndex, "HistoryOutput", _upscaleHistory[frameIndex]);
-
- const int threadGroupWorkRegionDim = 8;
- int dispatchDstX = (_paramsBuffer.Value.displaySize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
- int dispatchDstY = (_paramsBuffer.Value.displaySize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
- cmd.DispatchCompute(shader, kernelIndex, dispatchDstX, dispatchDstY, 1);
+ return vpDiff < threshold;
}
}
}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs
new file mode 100644
index 0000000..561a001
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs
@@ -0,0 +1,52 @@
+using System.Runtime.InteropServices;
+
+namespace UnityEngine.Rendering.PostProcessing
+{
+ internal class SGSR2Upscaler_2PassCS: SGSR2Upscaler
+ {
+ protected override string VariantName => "SGSR2 2-Pass CS";
+
+ protected override void DoRender(CommandBuffer cmd, PostProcessRenderContext context, Upscaling config)
+ {
+ Convert(cmd, context);
+ Upscale(cmd, context);
+ }
+
+ private void Convert(CommandBuffer cmd, PostProcessRenderContext context)
+ {
+ var shader = context.resources.computeShaders.sgsr2Upscaler.twoPassCompute.convert;
+ int kernelIndex = shader.FindKernel("CS");
+
+ cmd.SetComputeConstantBufferParam(shader, "cbSGSR2", _paramsBuffer, 0, Marshal.SizeOf());
+ cmd.SetComputeTextureParam(shader, kernelIndex, "InputColor", context.source);
+ cmd.SetComputeTextureParam(shader, kernelIndex, "InputDepth", BuiltinRenderTextureType.CameraTarget, 0, RenderTextureSubElement.Depth);
+ cmd.SetComputeTextureParam(shader, kernelIndex, "InputVelocity", BuiltinRenderTextureType.MotionVectors);
+ cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthClipAlphaBuffer", _motionDepthClipAlpha);
+ cmd.SetComputeTextureParam(shader, kernelIndex, "YCoCgColor", _colorLuma);
+
+ const int threadGroupWorkRegionDim = 8;
+ int dispatchSrcX = (_paramsBuffer.Value.renderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+ int dispatchSrcY = (_paramsBuffer.Value.renderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+ cmd.DispatchCompute(shader, kernelIndex, dispatchSrcX, dispatchSrcY, 1);
+ }
+
+ private void Upscale(CommandBuffer cmd, PostProcessRenderContext context)
+ {
+ var shader = context.resources.computeShaders.sgsr2Upscaler.twoPassCompute.upscale;
+ int kernelIndex = shader.FindKernel("CS");
+ uint frameIndex = _frameCount % 2;
+
+ cmd.SetComputeConstantBufferParam(shader, "cbSGSR2", _paramsBuffer, 0, Marshal.SizeOf());
+ cmd.SetComputeTextureParam(shader, kernelIndex, "PrevHistoryOutput", _upscaleHistory[frameIndex ^ 1]);
+ cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthClipAlphaBuffer", _motionDepthClipAlpha);
+ cmd.SetComputeTextureParam(shader, kernelIndex, "YCoCgColor", _colorLuma);
+ cmd.SetComputeTextureParam(shader, kernelIndex, "SceneColorOutput", context.destination);
+ cmd.SetComputeTextureParam(shader, kernelIndex, "HistoryOutput", _upscaleHistory[frameIndex]);
+
+ const int threadGroupWorkRegionDim = 8;
+ int dispatchDstX = (_paramsBuffer.Value.displaySize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+ int dispatchDstY = (_paramsBuffer.Value.displaySize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+ cmd.DispatchCompute(shader, kernelIndex, dispatchDstX, dispatchDstY, 1);
+ }
+ }
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs.meta
new file mode 100644
index 0000000..a49756d
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassCS.cs.meta
@@ -0,0 +1,3 @@
+fileFormatVersion: 2
+guid: 3d5127688822e654084c665f84c0c3e0
+timeCreated: 1734733770
\ No newline at end of file
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassFS.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassFS.cs
new file mode 100644
index 0000000..1fca768
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassFS.cs
@@ -0,0 +1,42 @@
+using System.Runtime.InteropServices;
+
+namespace UnityEngine.Rendering.PostProcessing
+{
+ internal class SGSR2Upscaler_2PassFS: SGSR2Upscaler
+ {
+ protected override string VariantName => "SGSR2 2-Pass Fragment";
+
+ private PropertySheet _sheet;
+ private readonly RenderTargetIdentifier[] _mrt = new RenderTargetIdentifier[2];
+
+ public override void CreateContext(PostProcessRenderContext context, Upscaling config)
+ {
+ base.CreateContext(context, config);
+
+ _sheet = new PropertySheet(new Material(context.resources.shaders.sgsr2Upscaler.twoPassFragment));
+ }
+
+ public override void DestroyContext()
+ {
+ _sheet.Release();
+
+ base.DestroyContext();
+ }
+
+ protected override void DoRender(CommandBuffer cmd, PostProcessRenderContext context, Upscaling config)
+ {
+ uint frameIndex = _frameCount % 2;
+
+ cmd.SetGlobalTexture("InputColor", context.source);
+ _sheet.properties.SetTexture("MotionDepthClipAlphaBuffer", _motionDepthClipAlpha);
+ _sheet.properties.SetTexture("PrevOutput", _upscaleHistory[frameIndex ^ 1]);
+ _sheet.properties.SetConstantBuffer("cbSGSR2", _paramsBuffer, 0, Marshal.SizeOf());
+
+ cmd.BlitFullscreenTriangle(BuiltinRenderTextureType.None, _motionDepthClipAlpha, _sheet, 0);
+
+ _mrt[0] = context.destination;
+ _mrt[1] = _upscaleHistory[frameIndex];
+ cmd.BlitFullscreenTriangle(BuiltinRenderTextureType.None, _mrt, BuiltinRenderTextureType.None, _sheet, 1);
+ }
+ }
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassFS.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassFS.cs.meta
new file mode 100644
index 0000000..b1bf38c
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_2PassFS.cs.meta
@@ -0,0 +1,3 @@
+fileFormatVersion: 2
+guid: 50ebaec17d8940c0ac51a8721f9f9419
+timeCreated: 1734977118
\ No newline at end of file
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_3PassCS.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_3PassCS.cs
new file mode 100644
index 0000000..6be8110
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_3PassCS.cs
@@ -0,0 +1,73 @@
+using System.Runtime.InteropServices;
+
+namespace UnityEngine.Rendering.PostProcessing
+{
+ internal class SGSR2Upscaler_3PassCS: SGSR2Upscaler
+ {
+ protected override string VariantName => "SGSR2 3-Pass CS";
+
+ protected override void DoRender(CommandBuffer cmd, PostProcessRenderContext context, Upscaling config)
+ {
+ Convert(cmd, context, config);
+ Activate(cmd, context);
+ Upscale(cmd, context);
+ }
+
+ private void Convert(CommandBuffer cmd, PostProcessRenderContext context, Upscaling config)
+ {
+ var shader = context.resources.computeShaders.sgsr2Upscaler.threePassCompute.convert;
+ int kernelIndex = shader.FindKernel("CS");
+
+ cmd.SetComputeConstantBufferParam(shader, "cbSGSR2", _paramsBuffer, 0, Marshal.SizeOf());
+ cmd.SetComputeTextureParam(shader, kernelIndex, "InputOpaqueColor", config.ColorOpaqueOnly);
+ cmd.SetComputeTextureParam(shader, kernelIndex, "InputColor", context.source);
+ cmd.SetComputeTextureParam(shader, kernelIndex, "InputDepth", BuiltinRenderTextureType.CameraTarget, 0, RenderTextureSubElement.Depth);
+ cmd.SetComputeTextureParam(shader, kernelIndex, "InputVelocity", BuiltinRenderTextureType.MotionVectors);
+ cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthAlphaBuffer", _motionDepthAlpha);
+ cmd.SetComputeTextureParam(shader, kernelIndex, "YCoCgColor", _colorLuma);
+
+ const int threadGroupWorkRegionDim = 8;
+ int dispatchSrcX = (_paramsBuffer.Value.renderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+ int dispatchSrcY = (_paramsBuffer.Value.renderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+ cmd.DispatchCompute(shader, kernelIndex, dispatchSrcX, dispatchSrcY, 1);
+ }
+
+ private void Activate(CommandBuffer cmd, PostProcessRenderContext context)
+ {
+ var shader = context.resources.computeShaders.sgsr2Upscaler.threePassCompute.activate;
+ int kernelIndex = shader.FindKernel("CS");
+ uint frameIndex = _frameCount % 2;
+
+ cmd.SetComputeConstantBufferParam(shader, "cbSGSR2", _paramsBuffer, 0, Marshal.SizeOf());
+ cmd.SetComputeTextureParam(shader, kernelIndex, "PrevLumaHistory", _lumaHistory[frameIndex ^ 1]);
+ cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthAlphaBuffer", _motionDepthAlpha);
+ cmd.SetComputeTextureParam(shader, kernelIndex, "YCoCgColor", _colorLuma);
+ cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthClipAlphaBuffer", _motionDepthClipAlpha);
+ cmd.SetComputeTextureParam(shader, kernelIndex, "LumaHistory", _lumaHistory[frameIndex]);
+
+ const int threadGroupWorkRegionDim = 8;
+ int dispatchSrcX = (_paramsBuffer.Value.renderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+ int dispatchSrcY = (_paramsBuffer.Value.renderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+ cmd.DispatchCompute(shader, kernelIndex, dispatchSrcX, dispatchSrcY, 1);
+ }
+
+ private void Upscale(CommandBuffer cmd, PostProcessRenderContext context)
+ {
+ var shader = context.resources.computeShaders.sgsr2Upscaler.threePassCompute.upscale;
+ int kernelIndex = shader.FindKernel("CS");
+ uint frameIndex = _frameCount % 2;
+
+ cmd.SetComputeConstantBufferParam(shader, "cbSGSR2", _paramsBuffer, 0, Marshal.SizeOf());
+ cmd.SetComputeTextureParam(shader, kernelIndex, "PrevHistoryOutput", _upscaleHistory[frameIndex ^ 1]);
+ cmd.SetComputeTextureParam(shader, kernelIndex, "MotionDepthClipAlphaBuffer", _motionDepthClipAlpha);
+ cmd.SetComputeTextureParam(shader, kernelIndex, "YCoCgColor", _colorLuma);
+ cmd.SetComputeTextureParam(shader, kernelIndex, "SceneColorOutput", context.destination);
+ cmd.SetComputeTextureParam(shader, kernelIndex, "HistoryOutput", _upscaleHistory[frameIndex]);
+
+ const int threadGroupWorkRegionDim = 8;
+ int dispatchDstX = (_paramsBuffer.Value.displaySize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+ int dispatchDstY = (_paramsBuffer.Value.displaySize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+ cmd.DispatchCompute(shader, kernelIndex, dispatchDstX, dispatchDstY, 1);
+ }
+ }
+}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_3PassCS.cs.meta b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_3PassCS.cs.meta
new file mode 100644
index 0000000..4f78d31
--- /dev/null
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/Effects/Upscaling/SGSR2Upscaler_3PassCS.cs.meta
@@ -0,0 +1,3 @@
+fileFormatVersion: 2
+guid: 78943b31437146f29ec0a7d8d67eb5cc
+timeCreated: 1734733770
\ No newline at end of file
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessLayer.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessLayer.cs
index a51151f..0c32a88 100644
--- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessLayer.cs
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessLayer.cs
@@ -742,8 +742,7 @@ namespace UnityEngine.Rendering.PostProcessing
// Create a copy of the opaque-only color buffer for auto-reactive mask generation
if (context.IsSuperResolutionActive() && (upscaling.autoGenerateReactiveMask || upscaling.autoGenerateTransparencyAndComposition))
{
- Vector2Int scaledRenderSize = upscaling.GetScaledRenderSize(context.camera);
- m_opaqueOnly = context.GetScreenSpaceTemporaryRT(colorFormat: sourceFormat, widthOverride: scaledRenderSize.x, heightOverride: scaledRenderSize.y);
+ m_opaqueOnly = context.GetScreenSpaceTemporaryRT(colorFormat: sourceFormat);
m_LegacyCmdBufferOpaque.BuiltinBlit(cameraTarget, m_opaqueOnly);
}
diff --git a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessResources.cs b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessResources.cs
index 7dfba97..bdb57a6 100644
--- a/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessResources.cs
+++ b/Packages/com.unity.postprocessing@3.2.2/PostProcessing/Runtime/PostProcessResources.cs
@@ -1,4 +1,5 @@
using System;
+using ArmASR;
using FidelityFX.FSR2;
using FidelityFX.FSR3;
using UnityEngine.Serialization;
@@ -140,6 +141,11 @@ namespace UnityEngine.Rendering.PostProcessing
///
public Shader screenSpaceReflections;
+ ///
+ /// The shaders used by the SnapDragon Game Super Resolution 2 (SGSR2) Upscaler.
+ ///
+ public SGSR2.Shaders sgsr2Upscaler;
+
///
/// Returns a copy of this class and its content.
///
@@ -239,7 +245,7 @@ namespace UnityEngine.Rendering.PostProcessing
///
/// Compute shaders used by the SnapDragon Game Super Resolution 2 (SGSR2) Upscaler.
///
- public SGSR2.Shaders sgsr2Upscaler;
+ public SGSR2.ComputeShaders sgsr2Upscaler;
///
/// Returns a copy of this class and its content.
@@ -292,6 +298,11 @@ namespace UnityEngine.Rendering.PostProcessing
/// All the compute shaders used by post-processing.
///
public ComputeShaders computeShaders;
+
+ ///
+ /// Shaders used by the Arm Accuracy Super Resolution (ASR) Upscaler.
+ ///
+ public AsrShaderBundle asrUpscalerShaders;
#if UNITY_EDITOR
///