commit f2cb2c5116d8b1c7f437478e319bdc937110f760 Author: Nico de Poel Date: Tue Jul 16 18:17:04 2024 +0200 First import of combined FSR2/3 core assets package diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2cd4c6e --- /dev/null +++ b/.gitignore @@ -0,0 +1,49 @@ +[Ll]ibrary/ +[Tt]emp/ +[Oo]bj/ +[Bb]uild/ +[Bb]uilds/ +Assets/AssetStoreTools* + +# Visual Studio cache directory +.vs/ + +# Autogenerated VS/MD/Consulo solution and project files +ExportedObj/ +.consulo/ +*.csproj +*.unityproj +*.sln +*.suo +*.tmp +*.user +*.userprefs +*.pidb +*.booproj +*.svd +*.pdb +*.opendb + +# Unity3D generated meta files +*.pidb.meta +*.pdb.meta + +# Unity3D Generated File On Crash Reports +sysinfo.txt + +# Builds +*.apk +*.unitypackage +*.DS_Store + +Logs +UserSettings +.vscode +.multitool +.vsconfig +Assets/ActualImages/ +*ActualImages.meta +*InitTestScene*.unity* +Assets/GITT/ +Assets/GITT.meta +Recordings diff --git a/Runtime.meta b/Runtime.meta new file mode 100644 index 0000000..a57d834 --- /dev/null +++ b/Runtime.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: b64c5aa2a705139438e2bcc551a02b97 +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/Common.meta b/Runtime/Common.meta new file mode 100644 index 0000000..7ac2081 --- /dev/null +++ b/Runtime/Common.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: e87a0efd0d1d4644abdef16b5a28bd0f +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/Common/ResourceView.cs b/Runtime/Common/ResourceView.cs new file mode 100644 index 0000000..e5946a3 --- /dev/null +++ b/Runtime/Common/ResourceView.cs @@ -0,0 +1,55 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using UnityEngine.Rendering; + +namespace FidelityFX +{ + /// + /// An immutable structure wrapping all of the necessary information to bind a specific buffer or attachment of a render target to a compute shader. + /// + public readonly struct ResourceView + { + /// + /// This value is the equivalent of not setting any value at all; all struct fields will have their default values. + /// It does not refer to a valid texture, therefore any variable set to this value should be checked for IsValid and reassigned before being bound to a shader. + /// + public static readonly ResourceView Unassigned = new ResourceView(default); + + /// + /// This value contains a valid texture reference that can be bound to a shader, however it is just an empty placeholder texture. + /// Binding this to a shader can be seen as setting the texture variable inside the shader to null. + /// + public static readonly ResourceView None = new ResourceView(BuiltinRenderTextureType.None); + + public ResourceView(in RenderTargetIdentifier renderTarget, RenderTextureSubElement subElement = RenderTextureSubElement.Default, int mipLevel = 0) + { + RenderTarget = renderTarget; + SubElement = subElement; + MipLevel = mipLevel; + } + + public bool IsValid => !RenderTarget.Equals(default); + + public readonly RenderTargetIdentifier RenderTarget; + public readonly RenderTextureSubElement SubElement; + public readonly int MipLevel; + } +} diff --git a/Runtime/Common/ResourceView.cs.meta b/Runtime/Common/ResourceView.cs.meta new file mode 100644 index 0000000..901157b --- /dev/null +++ b/Runtime/Common/ResourceView.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: eb9fdfac33a070740b66520d88f43ab7 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/Common/fidelityfx.fsr.common.asmdef b/Runtime/Common/fidelityfx.fsr.common.asmdef new file mode 100644 index 0000000..18443d2 --- /dev/null +++ b/Runtime/Common/fidelityfx.fsr.common.asmdef @@ -0,0 +1,14 @@ +{ + "name": "fidelityfx.fsr.common", + "rootNamespace": "FidelityFX", + "references": [], + "includePlatforms": [], + "excludePlatforms": [], + "allowUnsafeCode": false, + "overrideReferences": false, + "precompiledReferences": [], + "autoReferenced": true, + "defineConstraints": [], + "versionDefines": [], + "noEngineReferences": false +} \ No newline at end of file diff --git a/Runtime/Common/fidelityfx.fsr.common.asmdef.meta b/Runtime/Common/fidelityfx.fsr.common.asmdef.meta new file mode 100644 index 0000000..867a421 --- /dev/null +++ b/Runtime/Common/fidelityfx.fsr.common.asmdef.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: f355c2e9d97d4d8438e24fa268b0f6a5 +AssemblyDefinitionImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/FSR2.meta b/Runtime/FSR2.meta new file mode 100644 index 0000000..696ee85 --- /dev/null +++ b/Runtime/FSR2.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: cce3e495c6ece2145b041d0a6e43bb26 +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/FSR2/Fsr2.cs b/Runtime/FSR2/Fsr2.cs new file mode 100644 index 0000000..4259b6e --- /dev/null +++ b/Runtime/FSR2/Fsr2.cs @@ -0,0 +1,301 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using System; +using System.Runtime.InteropServices; +using UnityEngine; +using UnityEngine.Rendering; + +namespace FidelityFX +{ + /// + /// A collection of helper functions and data structures required by the FSR2 process. + /// + public static class Fsr2 + { + /// + /// Creates a new FSR2 context with standard parameters that are appropriate for the current platform. + /// + public static Fsr2Context CreateContext(Vector2Int displaySize, Vector2Int maxRenderSize, Fsr2Shaders shaders, InitializationFlags flags = 0) + { + if (SystemInfo.usesReversedZBuffer) + flags |= InitializationFlags.EnableDepthInverted; + else + flags &= ~InitializationFlags.EnableDepthInverted; + +#if UNITY_EDITOR || DEVELOPMENT_BUILD + flags |= InitializationFlags.EnableDebugChecking; +#endif + + Debug.Log($"Setting up FSR2 with render size: {maxRenderSize.x}x{maxRenderSize.y}, display size: {displaySize.x}x{displaySize.y}, flags: {flags}"); + + var contextDescription = new ContextDescription + { + Flags = flags, + DisplaySize = displaySize, + MaxRenderSize = maxRenderSize, + Shaders = shaders, + }; + + var context = new Fsr2Context(); + context.Create(contextDescription); + return context; + } + + public static float GetUpscaleRatioFromQualityMode(QualityMode qualityMode) + { + switch (qualityMode) + { + case QualityMode.NativeAA: + return 1.0f; + case QualityMode.UltraQuality: + return 1.2f; + case QualityMode.Quality: + return 1.5f; + case QualityMode.Balanced: + return 1.7f; + case QualityMode.Performance: + return 2.0f; + case QualityMode.UltraPerformance: + return 3.0f; + default: + return 1.0f; + } + } + + public static void GetRenderResolutionFromQualityMode( + out int renderWidth, out int renderHeight, + int displayWidth, int displayHeight, QualityMode qualityMode) + { + float ratio = GetUpscaleRatioFromQualityMode(qualityMode); + renderWidth = Mathf.RoundToInt(displayWidth / ratio); + renderHeight = Mathf.RoundToInt(displayHeight / ratio); + } + + public static float GetMipmapBiasOffset(int renderWidth, int displayWidth) + { + return Mathf.Log((float)renderWidth / displayWidth, 2.0f) - 1.0f; + } + + public static int GetJitterPhaseCount(int renderWidth, int displayWidth) + { + const float basePhaseCount = 8.0f; + int jitterPhaseCount = (int)(basePhaseCount * Mathf.Pow((float)displayWidth / renderWidth, 2.0f)); + return jitterPhaseCount; + } + + public static void GetJitterOffset(out float outX, out float outY, int index, int phaseCount) + { + outX = Halton((index % phaseCount) + 1, 2) - 0.5f; + outY = Halton((index % phaseCount) + 1, 3) - 0.5f; + } + + // Calculate halton number for index and base. + private static float Halton(int index, int @base) + { + float f = 1.0f, result = 0.0f; + + for (int currentIndex = index; currentIndex > 0;) { + + f /= @base; + result += f * (currentIndex % @base); + currentIndex = (int)Mathf.Floor((float)currentIndex / @base); + } + + return result; + } + + public static float Lanczos2(float value) + { + return Mathf.Abs(value) < Mathf.Epsilon ? 1.0f : Mathf.Sin(Mathf.PI * value) / (Mathf.PI * value) * (Mathf.Sin(0.5f * Mathf.PI * value) / (0.5f * Mathf.PI * value)); + } + +#if !UNITY_2021_1_OR_NEWER + internal static void SetBufferData(this CommandBuffer commandBuffer, ComputeBuffer computeBuffer, Array data) + { + commandBuffer.SetComputeBufferData(computeBuffer, data); + } +#endif + + public enum QualityMode + { + NativeAA = 0, + UltraQuality = 1, + Quality = 2, + Balanced = 3, + Performance = 4, + UltraPerformance = 5, + } + + [Flags] + public enum InitializationFlags + { + EnableHighDynamicRange = 1 << 0, + EnableDisplayResolutionMotionVectors = 1 << 1, + EnableMotionVectorsJitterCancellation = 1 << 2, + EnableDepthInverted = 1 << 3, + EnableDepthInfinite = 1 << 4, + EnableAutoExposure = 1 << 5, + EnableDynamicResolution = 1 << 6, + EnableFP16Usage = 1 << 7, + EnableDebugChecking = 1 << 8, + } + + /// + /// A structure encapsulating the parameters required to initialize FidelityFX Super Resolution 2 upscaling. + /// + public struct ContextDescription + { + public InitializationFlags Flags; + public Vector2Int MaxRenderSize; + public Vector2Int DisplaySize; + public Fsr2Shaders Shaders; + } + + /// + /// A structure encapsulating the parameters for dispatching the various passes of FidelityFX Super Resolution 2. + /// + public class DispatchDescription + { + public ResourceView Color; + public ResourceView Depth; + public ResourceView MotionVectors; + public ResourceView Exposure; // optional + public ResourceView Reactive; // optional + public ResourceView TransparencyAndComposition; // optional + public ResourceView Output; + public Vector2 JitterOffset; + public Vector2 MotionVectorScale; + public Vector2Int RenderSize; + public Vector2Int InputResourceSize; + public bool EnableSharpening; + public float Sharpness; + public float FrameTimeDelta; // in seconds + public float PreExposure; + public bool Reset; + public float CameraNear; + public float CameraFar; + public float CameraFovAngleVertical; + public float ViewSpaceToMetersFactor; + + // EXPERIMENTAL reactive mask generation parameters + public bool EnableAutoReactive; + public ResourceView ColorOpaqueOnly; + public float AutoTcThreshold = 0.05f; + public float AutoTcScale = 1.0f; + public float AutoReactiveScale = 5.0f; + public float AutoReactiveMax = 0.9f; + } + + /// + /// A structure encapsulating the parameters for automatic generation of a reactive mask. + /// The default values for Scale, CutoffThreshold, BinaryValue and Flags were taken from the FSR2 demo project. + /// + public class GenerateReactiveDescription + { + public ResourceView ColorOpaqueOnly; + public ResourceView ColorPreUpscale; + public ResourceView OutReactive; + public Vector2Int RenderSize; + public float Scale = 0.5f; + public float CutoffThreshold = 0.2f; + public float BinaryValue = 0.9f; + public GenerateReactiveFlags Flags = GenerateReactiveFlags.ApplyTonemap | GenerateReactiveFlags.ApplyThreshold | GenerateReactiveFlags.UseComponentsMax; + } + + [Flags] + public enum GenerateReactiveFlags + { + ApplyTonemap = 1 << 0, + ApplyInverseTonemap = 1 << 1, + ApplyThreshold = 1 << 2, + UseComponentsMax = 1 << 3, + } + + [Serializable, StructLayout(LayoutKind.Sequential)] + internal struct UpscalerConstants + { + public Vector2Int renderSize; + public Vector2Int maxRenderSize; + public Vector2Int displaySize; + public Vector2Int inputColorResourceDimensions; + public Vector2Int lumaMipDimensions; + public int lumaMipLevelToUse; + public int frameIndex; + + public Vector4 deviceToViewDepth; + public Vector2 jitterOffset; + public Vector2 motionVectorScale; + public Vector2 downscaleFactor; + public Vector2 motionVectorJitterCancellation; + public float preExposure; + public float previousFramePreExposure; + public float tanHalfFOV; + public float jitterPhaseCount; + public float deltaTime; + public float dynamicResChangeFactor; + public float viewSpaceToMetersFactor; + public float padding; + } + + [Serializable, StructLayout(LayoutKind.Sequential)] + internal struct SpdConstants + { + public uint mips; + public uint numWorkGroups; + public uint workGroupOffsetX, workGroupOffsetY; + public uint renderSizeX, renderSizeY; + } + + [Serializable, StructLayout(LayoutKind.Sequential)] + internal struct GenerateReactiveConstants + { + public float scale; + public float threshold; + public float binaryValue; + public uint flags; + } + + [Serializable, StructLayout(LayoutKind.Sequential)] + internal struct GenerateReactiveConstants2 + { + public float autoTcThreshold; + public float autoTcScale; + public float autoReactiveScale; + public float autoReactiveMax; + } + + [Serializable, StructLayout(LayoutKind.Sequential)] + internal struct RcasConstants + { + public RcasConstants(uint sharpness, uint halfSharp) + { + this.sharpness = sharpness; + this.halfSharp = halfSharp; + dummy0 = dummy1 = 0; + } + + public readonly uint sharpness; + public readonly uint halfSharp; + public readonly uint dummy0; + public readonly uint dummy1; + } + } +} diff --git a/Runtime/FSR2/Fsr2.cs.meta b/Runtime/FSR2/Fsr2.cs.meta new file mode 100644 index 0000000..10fe120 --- /dev/null +++ b/Runtime/FSR2/Fsr2.cs.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: 742d52dc87714f0d93f3b59719859dff +timeCreated: 1673441954 \ No newline at end of file diff --git a/Runtime/FSR2/Fsr2Assets.cs b/Runtime/FSR2/Fsr2Assets.cs new file mode 100644 index 0000000..840d16b --- /dev/null +++ b/Runtime/FSR2/Fsr2Assets.cs @@ -0,0 +1,152 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using UnityEngine; + +namespace FidelityFX +{ + /// + /// Scriptable object containing all shader resources required by FidelityFX Super Resolution 2 (FSR2). + /// These can be stored in an asset file and referenced from a scene or prefab, avoiding the need to load the shaders from a Resources folder. + /// + [CreateAssetMenu(fileName = "FSR2 Assets", menuName = "FidelityFX/FSR2 Assets", order = 1102)] + public class Fsr2Assets : ScriptableObject + { + public Fsr2Shaders shaders; + +#if UNITY_EDITOR + private void Reset() + { + shaders = new Fsr2Shaders + { + computeLuminancePyramidPass = FindComputeShader("ffx_fsr2_compute_luminance_pyramid_pass"), + reconstructPreviousDepthPass = FindComputeShader("ffx_fsr2_reconstruct_previous_depth_pass"), + depthClipPass = FindComputeShader("ffx_fsr2_depth_clip_pass"), + lockPass = FindComputeShader("ffx_fsr2_lock_pass"), + accumulatePass = FindComputeShader("ffx_fsr2_accumulate_pass"), + sharpenPass = FindComputeShader("ffx_fsr2_rcas_pass"), + autoGenReactivePass = FindComputeShader("ffx_fsr2_autogen_reactive_pass"), + tcrAutoGenPass = FindComputeShader("ffx_fsr2_tcr_autogen_pass"), + }; + } + + private static ComputeShader FindComputeShader(string name) + { + string[] assetGuids = UnityEditor.AssetDatabase.FindAssets($"t:ComputeShader {name}"); + if (assetGuids == null || assetGuids.Length == 0) + return null; + + string assetPath = UnityEditor.AssetDatabase.GUIDToAssetPath(assetGuids[0]); + return UnityEditor.AssetDatabase.LoadAssetAtPath(assetPath); + } +#endif + } + + /// + /// All the compute shaders used by FSR2. + /// + [System.Serializable] + public class Fsr2Shaders + { + /// + /// The compute shader used by the luminance pyramid computation pass. + /// + public ComputeShader computeLuminancePyramidPass; + + /// + /// The compute shader used by the previous depth reconstruction pass. + /// + public ComputeShader reconstructPreviousDepthPass; + + /// + /// The compute shader used by the depth clip pass. + /// + public ComputeShader depthClipPass; + + /// + /// The compute shader used by the lock pass. + /// + public ComputeShader lockPass; + + /// + /// The compute shader used by the accumulation pass. + /// + public ComputeShader accumulatePass; + + /// + /// The compute shader used by the RCAS sharpening pass. + /// + public ComputeShader sharpenPass; + + /// + /// The compute shader used to auto-generate a reactive mask. + /// + public ComputeShader autoGenReactivePass; + + /// + /// The compute shader used to auto-generate a transparency & composition mask. + /// + public ComputeShader tcrAutoGenPass; + + /// + /// Returns a copy of this class and its contents. + /// + public Fsr2Shaders Clone() + { + return (Fsr2Shaders)MemberwiseClone(); + } + + /// + /// Returns a copy of this class with clones of all its shaders. + /// This can be useful if you're running multiple FSR2 instances with different shader configurations. + /// Be sure to clean up these clones through Dispose once you're done with them. + /// + public Fsr2Shaders DeepCopy() + { + return new Fsr2Shaders + { + computeLuminancePyramidPass = Object.Instantiate(computeLuminancePyramidPass), + reconstructPreviousDepthPass = Object.Instantiate(reconstructPreviousDepthPass), + depthClipPass = Object.Instantiate(depthClipPass), + lockPass = Object.Instantiate(lockPass), + accumulatePass = Object.Instantiate(accumulatePass), + sharpenPass = Object.Instantiate(sharpenPass), + autoGenReactivePass = Object.Instantiate(autoGenReactivePass), + tcrAutoGenPass = Object.Instantiate(tcrAutoGenPass), + }; + } + + /// + /// Destroy all the shaders within this instance. + /// Use this only on clones created through DeepCopy. + /// + public void Dispose() + { + Object.Destroy(computeLuminancePyramidPass); + Object.Destroy(reconstructPreviousDepthPass); + Object.Destroy(depthClipPass); + Object.Destroy(lockPass); + Object.Destroy(accumulatePass); + Object.Destroy(sharpenPass); + Object.Destroy(autoGenReactivePass); + Object.Destroy(tcrAutoGenPass); + } + } +} diff --git a/Runtime/FSR2/Fsr2Assets.cs.meta b/Runtime/FSR2/Fsr2Assets.cs.meta new file mode 100644 index 0000000..3c446ad --- /dev/null +++ b/Runtime/FSR2/Fsr2Assets.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: db26e15a33db6ab42a38daab0ba2712f +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/FSR2/Fsr2Callbacks.cs b/Runtime/FSR2/Fsr2Callbacks.cs new file mode 100644 index 0000000..c4ea19e --- /dev/null +++ b/Runtime/FSR2/Fsr2Callbacks.cs @@ -0,0 +1,81 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using UnityEngine; + +namespace FidelityFX +{ + /// + /// A collection of callbacks required by the FSR2 process. + /// This allows some customization by the game dev on how to integrate FSR2 upscaling into their own game setup. + /// + public interface IFsr2Callbacks + { + /// + /// Apply a mipmap bias to in-game textures to prevent them from becoming blurry as the internal rendering resolution lowers. + /// This will need to be customized on a per-game basis, as there is no clear universal way to determine what are "in-game" textures. + /// The default implementation will simply apply a mipmap bias to all 2D textures, which will include things like UI textures and which might miss things like terrain texture arrays. + /// + /// Depending on how your game organizes its assets, you will want to create a filter that more specifically selects the textures that need to have this mipmap bias applied. + /// You may also want to store the bias offset value and apply it to any assets that are loaded in on demand. + /// + void ApplyMipmapBias(float biasOffset); + + void UndoMipmapBias(); + } + + /// + /// Default implementation of IFsr2Callbacks. + /// These are fine for testing but a proper game will want to extend and override these methods. + /// + public class Fsr2CallbacksBase: IFsr2Callbacks + { + protected float CurrentBiasOffset = 0; + + public virtual void ApplyMipmapBias(float biasOffset) + { + if (float.IsNaN(biasOffset) || float.IsInfinity(biasOffset)) + return; + + CurrentBiasOffset += biasOffset; + + if (Mathf.Approximately(CurrentBiasOffset, 0f)) + { + CurrentBiasOffset = 0f; + } + + foreach (var texture in Resources.FindObjectsOfTypeAll()) + { + if (texture.mipmapCount <= 1) + continue; + + texture.mipMapBias += biasOffset; + } + } + + public virtual void UndoMipmapBias() + { + if (CurrentBiasOffset == 0f) + return; + + ApplyMipmapBias(-CurrentBiasOffset); + } + } +} diff --git a/Runtime/FSR2/Fsr2Callbacks.cs.meta b/Runtime/FSR2/Fsr2Callbacks.cs.meta new file mode 100644 index 0000000..abc4d96 --- /dev/null +++ b/Runtime/FSR2/Fsr2Callbacks.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 5906deeb6ec2854449bf33db2e71a046 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/FSR2/Fsr2Context.cs b/Runtime/FSR2/Fsr2Context.cs new file mode 100644 index 0000000..f85c47b --- /dev/null +++ b/Runtime/FSR2/Fsr2Context.cs @@ -0,0 +1,613 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using System; +using System.Runtime.InteropServices; +using UnityEngine; +using UnityEngine.Rendering; + +namespace FidelityFX +{ + /// + /// This class loosely matches the FfxFsr2Context struct from the original FSR2 codebase. + /// It manages the various resources and compute passes required by the FSR2 process. + /// Note that this class does not know anything about Unity render pipelines; all it knows is CommandBuffers and RenderTargetIdentifiers. + /// This should make it suitable for integration with any of the available Unity render pipelines. + /// + public class Fsr2Context + { + private const int MaxQueuedFrames = 16; + + private Fsr2.ContextDescription _contextDescription; + private CommandBuffer _commandBuffer; + + private Fsr2Pass _computeLuminancePyramidPass; + private Fsr2Pass _reconstructPreviousDepthPass; + private Fsr2Pass _depthClipPass; + private Fsr2Pass _lockPass; + private Fsr2Pass _accumulatePass; + private Fsr2Pass _sharpenPass; + private Fsr2Pass _generateReactivePass; + private Fsr2Pass _tcrAutogeneratePass; + + private readonly Fsr2Resources _resources = new Fsr2Resources(); + + private ComputeBuffer _upscalerConstantsBuffer; + private readonly Fsr2.UpscalerConstants[] _upscalerConstantsArray = { new Fsr2.UpscalerConstants() }; + private ref Fsr2.UpscalerConstants UpscalerConsts => ref _upscalerConstantsArray[0]; + + private ComputeBuffer _spdConstantsBuffer; + private readonly Fsr2.SpdConstants[] _spdConstantsArray = { new Fsr2.SpdConstants() }; + private ref Fsr2.SpdConstants SpdConsts => ref _spdConstantsArray[0]; + + private ComputeBuffer _rcasConstantsBuffer; + private readonly Fsr2.RcasConstants[] _rcasConstantsArray = new Fsr2.RcasConstants[1]; + private ref Fsr2.RcasConstants RcasConsts => ref _rcasConstantsArray[0]; + + private ComputeBuffer _generateReactiveConstantsBuffer; + private readonly Fsr2.GenerateReactiveConstants[] _generateReactiveConstantsArray = { new Fsr2.GenerateReactiveConstants() }; + private ref Fsr2.GenerateReactiveConstants GenReactiveConsts => ref _generateReactiveConstantsArray[0]; + + private ComputeBuffer _tcrAutogenerateConstantsBuffer; + private readonly Fsr2.GenerateReactiveConstants2[] _tcrAutogenerateConstantsArray = { new Fsr2.GenerateReactiveConstants2() }; + private ref Fsr2.GenerateReactiveConstants2 TcrAutoGenConsts => ref _tcrAutogenerateConstantsArray[0]; + + private bool _firstExecution; + private Vector2 _previousJitterOffset; + private int _resourceFrameIndex; + + public void Create(Fsr2.ContextDescription contextDescription) + { + _contextDescription = contextDescription; + _commandBuffer = new CommandBuffer { name = "FSR2" }; + + _upscalerConstantsBuffer = CreateConstantBuffer(); + _spdConstantsBuffer = CreateConstantBuffer(); + _rcasConstantsBuffer = CreateConstantBuffer(); + _generateReactiveConstantsBuffer = CreateConstantBuffer(); + _tcrAutogenerateConstantsBuffer = CreateConstantBuffer(); + + // Set defaults + _firstExecution = true; + _resourceFrameIndex = 0; + + UpscalerConsts.displaySize = _contextDescription.DisplaySize; + + _resources.Create(_contextDescription); + CreatePasses(); + } + + private void CreatePasses() + { + _computeLuminancePyramidPass = new Fsr2ComputeLuminancePyramidPass(_contextDescription, _resources, _upscalerConstantsBuffer, _spdConstantsBuffer); + _reconstructPreviousDepthPass = new Fsr2ReconstructPreviousDepthPass(_contextDescription, _resources, _upscalerConstantsBuffer); + _depthClipPass = new Fsr2DepthClipPass(_contextDescription, _resources, _upscalerConstantsBuffer); + _lockPass = new Fsr2LockPass(_contextDescription, _resources, _upscalerConstantsBuffer); + _accumulatePass = new Fsr2AccumulatePass(_contextDescription, _resources, _upscalerConstantsBuffer); + _sharpenPass = new Fsr2SharpenPass(_contextDescription, _resources, _upscalerConstantsBuffer, _rcasConstantsBuffer); + _generateReactivePass = new Fsr2GenerateReactivePass(_contextDescription, _resources, _generateReactiveConstantsBuffer); + _tcrAutogeneratePass = new Fsr2TcrAutogeneratePass(_contextDescription, _resources, _upscalerConstantsBuffer, _tcrAutogenerateConstantsBuffer); + } + + public void Destroy() + { + DestroyPass(ref _tcrAutogeneratePass); + DestroyPass(ref _generateReactivePass); + DestroyPass(ref _sharpenPass); + DestroyPass(ref _accumulatePass); + DestroyPass(ref _lockPass); + DestroyPass(ref _depthClipPass); + DestroyPass(ref _reconstructPreviousDepthPass); + DestroyPass(ref _computeLuminancePyramidPass); + + _resources.Destroy(); + + DestroyConstantBuffer(ref _tcrAutogenerateConstantsBuffer); + DestroyConstantBuffer(ref _generateReactiveConstantsBuffer); + DestroyConstantBuffer(ref _rcasConstantsBuffer); + DestroyConstantBuffer(ref _spdConstantsBuffer); + DestroyConstantBuffer(ref _upscalerConstantsBuffer); + + if (_commandBuffer != null) + { + _commandBuffer.Dispose(); + _commandBuffer = null; + } + } + + public void Dispatch(Fsr2.DispatchDescription dispatchParams) + { + _commandBuffer.Clear(); + Dispatch(dispatchParams, _commandBuffer); + Graphics.ExecuteCommandBuffer(_commandBuffer); + } + + public void Dispatch(Fsr2.DispatchDescription dispatchParams, CommandBuffer commandBuffer) + { + if ((_contextDescription.Flags & Fsr2.InitializationFlags.EnableDebugChecking) != 0) + { + DebugCheckDispatch(dispatchParams); + } + + if (_firstExecution) + { + commandBuffer.SetRenderTarget(_resources.LockStatus[0]); + commandBuffer.ClearRenderTarget(false, true, Color.clear); + commandBuffer.SetRenderTarget(_resources.LockStatus[1]); + commandBuffer.ClearRenderTarget(false, true, Color.clear); + } + + int frameIndex = _resourceFrameIndex % 2; + bool resetAccumulation = dispatchParams.Reset || _firstExecution; + _firstExecution = false; + + // If auto exposure is enabled use the auto exposure SRV, otherwise what the app sends + if ((_contextDescription.Flags & Fsr2.InitializationFlags.EnableAutoExposure) != 0) + dispatchParams.Exposure = new ResourceView(_resources.AutoExposure); + else if (!dispatchParams.Exposure.IsValid) + dispatchParams.Exposure = new ResourceView(_resources.DefaultExposure); + + if (dispatchParams.EnableAutoReactive) + { + // Create the auto-TCR resources only when we need them + if (_resources.AutoReactive == null) + _resources.CreateTcrAutogenResources(_contextDescription); + + if (resetAccumulation) + { + RenderTargetIdentifier opaqueOnly = dispatchParams.ColorOpaqueOnly.IsValid ? dispatchParams.ColorOpaqueOnly.RenderTarget : Fsr2ShaderIDs.SrvOpaqueOnly; + commandBuffer.Blit(_resources.PrevPreAlpha[frameIndex ^ 1], opaqueOnly); + } + } + else if (_resources.AutoReactive != null) + { + // Destroy the auto-TCR resources if we don't use the feature + _resources.DestroyTcrAutogenResources(); + } + + if (!dispatchParams.Reactive.IsValid) dispatchParams.Reactive = new ResourceView(_resources.DefaultReactive); + if (!dispatchParams.TransparencyAndComposition.IsValid) dispatchParams.TransparencyAndComposition = new ResourceView(_resources.DefaultReactive); + Fsr2Resources.CreateAliasableResources(commandBuffer, _contextDescription, dispatchParams); + + SetupConstants(dispatchParams, resetAccumulation); + + // Reactive mask bias + const int threadGroupWorkRegionDim = 8; + int dispatchSrcX = (UpscalerConsts.renderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchSrcY = (UpscalerConsts.renderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchDstX = (_contextDescription.DisplaySize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchDstY = (_contextDescription.DisplaySize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + + // Clear reconstructed depth for max depth store + if (resetAccumulation) + { + commandBuffer.SetRenderTarget(_resources.LockStatus[frameIndex ^ 1]); + commandBuffer.ClearRenderTarget(false, true, Color.clear); + + commandBuffer.SetRenderTarget(_resources.InternalUpscaled[frameIndex ^ 1]); + commandBuffer.ClearRenderTarget(false, true, Color.clear); + + commandBuffer.SetRenderTarget(_resources.SceneLuminance); + commandBuffer.ClearRenderTarget(false, true, Color.clear); + + // Auto exposure always used to track luma changes in locking logic + commandBuffer.SetRenderTarget(_resources.AutoExposure); + commandBuffer.ClearRenderTarget(false, true, new Color(0f, 1e8f, 0f, 0f)); + + // Reset atomic counter to 0 + commandBuffer.SetRenderTarget(_resources.SpdAtomicCounter); + commandBuffer.ClearRenderTarget(false, true, Color.clear); + } + + // FSR3: need to clear here since we need the content of this surface for frame interpolation, so clearing in the lock pass is not an option + bool depthInverted = (_contextDescription.Flags & Fsr2.InitializationFlags.EnableDepthInverted) == Fsr2.InitializationFlags.EnableDepthInverted; + commandBuffer.SetRenderTarget(Fsr2ShaderIDs.UavReconstructedPrevNearestDepth); + commandBuffer.ClearRenderTarget(false, true, depthInverted ? Color.clear : Color.white); + + // Auto exposure + SetupSpdConstants(dispatchParams, out var dispatchThreadGroupCount); + + // Initialize constant buffers data + commandBuffer.SetBufferData(_upscalerConstantsBuffer, _upscalerConstantsArray); + commandBuffer.SetBufferData(_spdConstantsBuffer, _spdConstantsArray); + + // Auto reactive + if (dispatchParams.EnableAutoReactive) + { + GenerateTransparencyCompositionReactive(dispatchParams, commandBuffer, frameIndex); + dispatchParams.Reactive = new ResourceView(_resources.AutoReactive); + dispatchParams.TransparencyAndComposition = new ResourceView(_resources.AutoComposition); + } + + // Compute luminance pyramid + _computeLuminancePyramidPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchThreadGroupCount.x, dispatchThreadGroupCount.y); + + // Reconstruct previous depth + _reconstructPreviousDepthPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY); + + // Depth clip + _depthClipPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY); + + // Create locks + _lockPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY); + + // Accumulate + _accumulatePass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchDstX, dispatchDstY); + + if (dispatchParams.EnableSharpening) + { + // Compute the constants + SetupRcasConstants(dispatchParams); + commandBuffer.SetBufferData(_rcasConstantsBuffer, _rcasConstantsArray); + + // Dispatch RCAS + const int threadGroupWorkRegionDimRcas = 16; + int threadGroupsX = (Screen.width + threadGroupWorkRegionDimRcas - 1) / threadGroupWorkRegionDimRcas; + int threadGroupsY = (Screen.height + threadGroupWorkRegionDimRcas - 1) / threadGroupWorkRegionDimRcas; + _sharpenPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, threadGroupsX, threadGroupsY); + } + + _resourceFrameIndex = (_resourceFrameIndex + 1) % MaxQueuedFrames; + + Fsr2Resources.DestroyAliasableResources(commandBuffer); + } + + public void GenerateReactiveMask(Fsr2.GenerateReactiveDescription dispatchParams) + { + _commandBuffer.Clear(); + GenerateReactiveMask(dispatchParams, _commandBuffer); + Graphics.ExecuteCommandBuffer(_commandBuffer); + } + + public void GenerateReactiveMask(Fsr2.GenerateReactiveDescription dispatchParams, CommandBuffer commandBuffer) + { + const int threadGroupWorkRegionDim = 8; + int dispatchSrcX = (dispatchParams.RenderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchSrcY = (dispatchParams.RenderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + + GenReactiveConsts.scale = dispatchParams.Scale; + GenReactiveConsts.threshold = dispatchParams.CutoffThreshold; + GenReactiveConsts.binaryValue = dispatchParams.BinaryValue; + GenReactiveConsts.flags = (uint)dispatchParams.Flags; + commandBuffer.SetBufferData(_generateReactiveConstantsBuffer, _generateReactiveConstantsArray); + + ((Fsr2GenerateReactivePass)_generateReactivePass).ScheduleDispatch(commandBuffer, dispatchParams, dispatchSrcX, dispatchSrcY); + } + + private void GenerateTransparencyCompositionReactive(Fsr2.DispatchDescription dispatchParams, CommandBuffer commandBuffer, int frameIndex) + { + const int threadGroupWorkRegionDim = 8; + int dispatchSrcX = (dispatchParams.RenderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchSrcY = (dispatchParams.RenderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + + TcrAutoGenConsts.autoTcThreshold = dispatchParams.AutoTcThreshold; + TcrAutoGenConsts.autoTcScale = dispatchParams.AutoTcScale; + TcrAutoGenConsts.autoReactiveScale = dispatchParams.AutoReactiveScale; + TcrAutoGenConsts.autoReactiveMax = dispatchParams.AutoReactiveMax; + commandBuffer.SetBufferData(_tcrAutogenerateConstantsBuffer, _tcrAutogenerateConstantsArray); + + _tcrAutogeneratePass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY); + } + + private void SetupConstants(Fsr2.DispatchDescription dispatchParams, bool resetAccumulation) + { + ref Fsr2.UpscalerConstants constants = ref UpscalerConsts; + + constants.jitterOffset = dispatchParams.JitterOffset; + constants.renderSize = dispatchParams.RenderSize; + constants.maxRenderSize = _contextDescription.MaxRenderSize; + constants.inputColorResourceDimensions = dispatchParams.InputResourceSize; + + // Compute the horizontal FOV for the shader from the vertical one + float aspectRatio = (float)dispatchParams.RenderSize.x / dispatchParams.RenderSize.y; + float cameraAngleHorizontal = Mathf.Atan(Mathf.Tan(dispatchParams.CameraFovAngleVertical / 2.0f) * aspectRatio) * 2.0f; + constants.tanHalfFOV = Mathf.Tan(cameraAngleHorizontal * 0.5f); + constants.viewSpaceToMetersFactor = (dispatchParams.ViewSpaceToMetersFactor > 0.0f) ? dispatchParams.ViewSpaceToMetersFactor : 1.0f; + + // Compute params to enable device depth to view space depth computation in shader + constants.deviceToViewDepth = SetupDeviceDepthToViewSpaceDepthParams(dispatchParams); + + // To be updated if resource is larger than the actual image size + constants.downscaleFactor = new Vector2((float)constants.renderSize.x / _contextDescription.DisplaySize.x, (float)constants.renderSize.y / _contextDescription.DisplaySize.y); + constants.previousFramePreExposure = constants.preExposure; + constants.preExposure = (dispatchParams.PreExposure != 0) ? dispatchParams.PreExposure : 1.0f; + + // Motion vector data + Vector2Int motionVectorsTargetSize = (_contextDescription.Flags & Fsr2.InitializationFlags.EnableDisplayResolutionMotionVectors) != 0 ? constants.displaySize : constants.renderSize; + constants.motionVectorScale = dispatchParams.MotionVectorScale / motionVectorsTargetSize; + + // Compute jitter cancellation + if ((_contextDescription.Flags & Fsr2.InitializationFlags.EnableMotionVectorsJitterCancellation) != 0) + { + constants.motionVectorJitterCancellation = (_previousJitterOffset - constants.jitterOffset) / motionVectorsTargetSize; + _previousJitterOffset = constants.jitterOffset; + } + + int jitterPhaseCount = Fsr2.GetJitterPhaseCount(dispatchParams.RenderSize.x, _contextDescription.DisplaySize.x); + if (resetAccumulation || constants.jitterPhaseCount == 0) + { + constants.jitterPhaseCount = jitterPhaseCount; + } + else + { + int jitterPhaseCountDelta = (int)(jitterPhaseCount - constants.jitterPhaseCount); + if (jitterPhaseCountDelta > 0) + constants.jitterPhaseCount++; + else if (jitterPhaseCountDelta < 0) + constants.jitterPhaseCount--; + } + + // Convert delta time to seconds and clamp to [0, 1] + constants.deltaTime = Mathf.Clamp01(dispatchParams.FrameTimeDelta); + + if (resetAccumulation) + constants.frameIndex = 0; + else + constants.frameIndex++; + + // Shading change usage of the SPD mip levels + constants.lumaMipLevelToUse = Fsr2Pass.ShadingChangeMipLevel; + + float mipDiv = 2 << constants.lumaMipLevelToUse; + constants.lumaMipDimensions.x = (int)(constants.maxRenderSize.x / mipDiv); + constants.lumaMipDimensions.y = (int)(constants.maxRenderSize.y / mipDiv); + } + + private Vector4 SetupDeviceDepthToViewSpaceDepthParams(Fsr2.DispatchDescription dispatchParams) + { + bool inverted = (_contextDescription.Flags & Fsr2.InitializationFlags.EnableDepthInverted) != 0; + bool infinite = (_contextDescription.Flags & Fsr2.InitializationFlags.EnableDepthInfinite) != 0; + + // make sure it has no impact if near and far plane values are swapped in dispatch params + // the flags "inverted" and "infinite" will decide what transform to use + float min = Mathf.Min(dispatchParams.CameraNear, dispatchParams.CameraFar); + float max = Mathf.Max(dispatchParams.CameraNear, dispatchParams.CameraFar); + + if (inverted) + { + (min, max) = (max, min); + } + + float q = max / (min - max); + float d = -1.0f; + + Vector4 matrixElemC = new Vector4(q, -1.0f - Mathf.Epsilon, q, 0.0f + Mathf.Epsilon); + Vector4 matrixElemE = new Vector4(q * min, -min - Mathf.Epsilon, q * min, max); + + // Revert x and y coords + float aspect = (float)dispatchParams.RenderSize.x / dispatchParams.RenderSize.y; + float cotHalfFovY = Mathf.Cos(0.5f * dispatchParams.CameraFovAngleVertical) / Mathf.Sin(0.5f * dispatchParams.CameraFovAngleVertical); + + int matrixIndex = (inverted ? 2 : 0) + (infinite ? 1 : 0); + return new Vector4( + d * matrixElemC[matrixIndex], + matrixElemE[matrixIndex], + aspect / cotHalfFovY, + 1.0f / cotHalfFovY); + } + + private void SetupRcasConstants(Fsr2.DispatchDescription dispatchParams) + { + int sharpnessIndex = Mathf.RoundToInt(Mathf.Clamp01(dispatchParams.Sharpness) * (RcasConfigs.Length - 1)); + RcasConsts = RcasConfigs[sharpnessIndex]; + } + + private void SetupSpdConstants(Fsr2.DispatchDescription dispatchParams, out Vector2Int dispatchThreadGroupCount) + { + RectInt rectInfo = new RectInt(0, 0, dispatchParams.RenderSize.x, dispatchParams.RenderSize.y); + SpdSetup(rectInfo, out dispatchThreadGroupCount, out var workGroupOffset, out var numWorkGroupsAndMips); + + // Downsample + ref Fsr2.SpdConstants spdConstants = ref SpdConsts; + spdConstants.numWorkGroups = (uint)numWorkGroupsAndMips.x; + spdConstants.mips = (uint)numWorkGroupsAndMips.y; + spdConstants.workGroupOffsetX = (uint)workGroupOffset.x; + spdConstants.workGroupOffsetY = (uint)workGroupOffset.y; + spdConstants.renderSizeX = (uint)dispatchParams.RenderSize.x; + spdConstants.renderSizeY = (uint)dispatchParams.RenderSize.y; + } + + private static void SpdSetup(RectInt rectInfo, out Vector2Int dispatchThreadGroupCount, out Vector2Int workGroupOffset, out Vector2Int numWorkGroupsAndMips, int mips = -1) + { + workGroupOffset = new Vector2Int(rectInfo.x / 64, rectInfo.y / 64); + + int endIndexX = (rectInfo.x + rectInfo.width - 1) / 64; + int endIndexY = (rectInfo.y + rectInfo.height - 1) / 64; + + dispatchThreadGroupCount = new Vector2Int(endIndexX + 1 - workGroupOffset.x, endIndexY + 1 - workGroupOffset.y); + + numWorkGroupsAndMips = new Vector2Int(dispatchThreadGroupCount.x * dispatchThreadGroupCount.y, mips); + if (mips < 0) + { + float resolution = Math.Max(rectInfo.width, rectInfo.height); + numWorkGroupsAndMips.y = Math.Min(Mathf.FloorToInt(Mathf.Log(resolution, 2.0f)), 12); + } + } + + private void DebugCheckDispatch(Fsr2.DispatchDescription dispatchParams) + { + if (!dispatchParams.Color.IsValid) + { + Debug.LogError("Color resource is null"); + } + + if (!dispatchParams.Depth.IsValid) + { + Debug.LogError("Depth resource is null"); + } + + if (!dispatchParams.MotionVectors.IsValid) + { + Debug.LogError("MotionVectors resource is null"); + } + + if (!dispatchParams.Output.IsValid) + { + Debug.LogError("Output resource is null"); + } + + if (dispatchParams.Exposure.IsValid && (_contextDescription.Flags & Fsr2.InitializationFlags.EnableAutoExposure) != 0) + { + Debug.LogWarning("Exposure resource provided, however auto exposure flag is present"); + } + + if (Mathf.Abs(dispatchParams.JitterOffset.x) > 1.0f || Mathf.Abs(dispatchParams.JitterOffset.y) > 1.0f) + { + Debug.LogWarning("JitterOffset contains value outside of expected range [-1.0, 1.0]"); + } + + if (dispatchParams.MotionVectorScale.x > _contextDescription.MaxRenderSize.x || dispatchParams.MotionVectorScale.y > _contextDescription.MaxRenderSize.y) + { + Debug.LogWarning("MotionVectorScale contains scale value greater than MaxRenderSize"); + } + + if (dispatchParams.MotionVectorScale.x == 0.0f || dispatchParams.MotionVectorScale.y == 0.0f) + { + Debug.LogWarning("MotionVectorScale contains zero scale value"); + } + + if (dispatchParams.RenderSize.x > _contextDescription.MaxRenderSize.x || dispatchParams.RenderSize.y > _contextDescription.MaxRenderSize.y) + { + Debug.LogWarning("RenderSize is greater than context MaxRenderSize"); + } + + if (dispatchParams.RenderSize.x == 0 || dispatchParams.RenderSize.y == 0) + { + Debug.LogWarning("RenderSize contains zero dimension"); + } + + if (dispatchParams.FrameTimeDelta > 1.0f) + { + Debug.LogWarning("FrameTimeDelta is greater than 1.0f - this value should be seconds (~0.0166 for 60fps)"); + } + + if (dispatchParams.PreExposure == 0.0f) + { + Debug.LogError("PreExposure provided as 0.0f which is invalid"); + } + + bool infiniteDepth = (_contextDescription.Flags & Fsr2.InitializationFlags.EnableDepthInfinite) != 0; + bool inverseDepth = (_contextDescription.Flags & Fsr2.InitializationFlags.EnableDepthInverted) != 0; + + if (inverseDepth) + { + if (dispatchParams.CameraNear < dispatchParams.CameraFar) + { + Debug.LogWarning("EnableDepthInverted flag is present yet CameraNear is less than CameraFar"); + } + + if (infiniteDepth) + { + if (dispatchParams.CameraNear < float.MaxValue) + { + Debug.LogWarning("EnableDepthInfinite and EnableDepthInverted present, yet CameraNear != float.MaxValue"); + } + } + + if (dispatchParams.CameraFar < 0.075f) + { + Debug.LogWarning("EnableDepthInverted present, CameraFar value is very low which may result in depth separation artefacting"); + } + } + else + { + if (dispatchParams.CameraNear > dispatchParams.CameraFar) + { + Debug.LogWarning("CameraNear is greater than CameraFar in non-inverted-depth context"); + } + + if (infiniteDepth) + { + if (dispatchParams.CameraFar < float.MaxValue) + { + Debug.LogWarning("EnableDepthInfinite present, yet CameraFar != float.MaxValue"); + } + } + + if (dispatchParams.CameraNear < 0.075f) + { + Debug.LogWarning("CameraNear value is very low which may result in depth separation artefacting"); + } + } + + if (dispatchParams.CameraFovAngleVertical <= 0.0f) + { + Debug.LogError("CameraFovAngleVertical is 0.0f - this value should be > 0.0f"); + } + + if (dispatchParams.CameraFovAngleVertical > Mathf.PI) + { + Debug.LogError("CameraFovAngleVertical is greater than 180 degrees/PI"); + } + } + + /// + /// The FSR2 C++ codebase uses floats bitwise converted to ints to pass sharpness parameters to the RCAS shader. + /// This is not possible in C# without enabling unsafe code compilation, so to avoid that we instead use a table of precomputed values. + /// + private static readonly Fsr2.RcasConstants[] RcasConfigs = new [] + { + new Fsr2.RcasConstants(1048576000u, 872428544u), + new Fsr2.RcasConstants(1049178080u, 877212745u), + new Fsr2.RcasConstants(1049823372u, 882390168u), + new Fsr2.RcasConstants(1050514979u, 887895276u), + new Fsr2.RcasConstants(1051256227u, 893859143u), + new Fsr2.RcasConstants(1052050675u, 900216232u), + new Fsr2.RcasConstants(1052902144u, 907032080u), + new Fsr2.RcasConstants(1053814727u, 914306687u), + new Fsr2.RcasConstants(1054792807u, 922105590u), + new Fsr2.RcasConstants(1055841087u, 930494326u), + new Fsr2.RcasConstants(1056964608u, 939538432u), + new Fsr2.RcasConstants(1057566688u, 944322633u), + new Fsr2.RcasConstants(1058211980u, 949500056u), + new Fsr2.RcasConstants(1058903587u, 955005164u), + new Fsr2.RcasConstants(1059644835u, 960969031u), + new Fsr2.RcasConstants(1060439283u, 967326120u), + new Fsr2.RcasConstants(1061290752u, 974141968u), + new Fsr2.RcasConstants(1062203335u, 981416575u), + new Fsr2.RcasConstants(1063181415u, 989215478u), + new Fsr2.RcasConstants(1064229695u, 997604214u), + new Fsr2.RcasConstants(1065353216u, 1006648320), + }; + + private static ComputeBuffer CreateConstantBuffer() where TConstants: struct + { + return new ComputeBuffer(1, Marshal.SizeOf(), ComputeBufferType.Constant); + } + + private static void DestroyConstantBuffer(ref ComputeBuffer bufferRef) + { + if (bufferRef == null) + return; + + bufferRef.Release(); + bufferRef = null; + } + + private static void DestroyPass(ref Fsr2Pass pass) + { + if (pass == null) + return; + + pass.Dispose(); + pass = null; + } + } +} diff --git a/Runtime/FSR2/Fsr2Context.cs.meta b/Runtime/FSR2/Fsr2Context.cs.meta new file mode 100644 index 0000000..ed50468 --- /dev/null +++ b/Runtime/FSR2/Fsr2Context.cs.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: 2f00ea267c3443d88bbd0e9dd7c08b4a +timeCreated: 1673442225 \ No newline at end of file diff --git a/Runtime/FSR2/Fsr2Pass.cs b/Runtime/FSR2/Fsr2Pass.cs new file mode 100644 index 0000000..415a353 --- /dev/null +++ b/Runtime/FSR2/Fsr2Pass.cs @@ -0,0 +1,387 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using System; +using System.Runtime.InteropServices; +using UnityEngine; +using UnityEngine.Profiling; +using UnityEngine.Rendering; + +namespace FidelityFX +{ + /// + /// Base class for all of the compute passes that make up the FSR2 process. + /// This loosely matches the FfxPipelineState struct from the original FSR2 codebase, wrapped in an object-oriented blanket. + /// These classes are responsible for loading compute shaders, managing temporary resources, binding resources to shader kernels and dispatching said shaders. + /// + internal abstract class Fsr2Pass: IDisposable + { + internal const int ShadingChangeMipLevel = 4; // This matches the FFX_FSR2_SHADING_CHANGE_MIP_LEVEL define + + protected readonly Fsr2.ContextDescription ContextDescription; + protected readonly Fsr2Resources Resources; + protected readonly ComputeBuffer Constants; + + protected ComputeShader ComputeShader; + protected int KernelIndex; + + protected CustomSampler Sampler; + + protected Fsr2Pass(Fsr2.ContextDescription contextDescription, Fsr2Resources resources, ComputeBuffer constants) + { + ContextDescription = contextDescription; + Resources = resources; + Constants = constants; + } + + public virtual void Dispose() + { + } + + public void ScheduleDispatch(CommandBuffer commandBuffer, Fsr2.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + commandBuffer.BeginSample(Sampler); + DoScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchX, dispatchY); + commandBuffer.EndSample(Sampler); + } + + protected abstract void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr2.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY); + + protected void InitComputeShader(string passName, ComputeShader shader) + { + InitComputeShader(passName, shader, ContextDescription.Flags); + } + + private void InitComputeShader(string passName, ComputeShader shader, Fsr2.InitializationFlags flags) + { + if (shader == null) + { + throw new MissingReferenceException($"Shader for FSR2 pass '{passName}' could not be loaded! Please ensure it is included in the project correctly."); + } + + ComputeShader = shader; + KernelIndex = ComputeShader.FindKernel("CS"); + Sampler = CustomSampler.Create(passName); + + bool useLut = false; +#if UNITY_2022_1_OR_NEWER // This will also work in 2020.3.43+ and 2021.3.14+ + if (SystemInfo.computeSubGroupSize == 64) + { + useLut = true; + } +#endif + + // This matches the permutation rules from the CreatePipeline* functions + if ((flags & Fsr2.InitializationFlags.EnableHighDynamicRange) != 0) ComputeShader.EnableKeyword("FFX_FSR2_OPTION_HDR_COLOR_INPUT"); + if ((flags & Fsr2.InitializationFlags.EnableDisplayResolutionMotionVectors) == 0) ComputeShader.EnableKeyword("FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS"); + if ((flags & Fsr2.InitializationFlags.EnableMotionVectorsJitterCancellation) != 0) ComputeShader.EnableKeyword("FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS"); + if ((flags & Fsr2.InitializationFlags.EnableDepthInverted) != 0) ComputeShader.EnableKeyword("FFX_FSR2_OPTION_INVERTED_DEPTH"); + if (useLut) ComputeShader.EnableKeyword("FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE"); + if ((flags & Fsr2.InitializationFlags.EnableFP16Usage) != 0) ComputeShader.EnableKeyword("FFX_HALF"); + + // Inform the shader which render pipeline we're currently using + var pipeline = GraphicsSettings.currentRenderPipeline; + if (pipeline != null && pipeline.GetType().Name.Contains("HDRenderPipeline")) + { + ComputeShader.EnableKeyword("UNITY_FSR_HDRP"); + } + } + } + + internal class Fsr2ComputeLuminancePyramidPass : Fsr2Pass + { + private readonly ComputeBuffer _spdConstants; + + public Fsr2ComputeLuminancePyramidPass(Fsr2.ContextDescription contextDescription, Fsr2Resources resources, ComputeBuffer constants, ComputeBuffer spdConstants) + : base(contextDescription, resources, constants) + { + _spdConstants = spdConstants; + + InitComputeShader("Compute Luminance Pyramid", contextDescription.Shaders.computeLuminancePyramidPass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr2.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + ref var color = ref dispatchParams.Color; + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvInputColor, color.RenderTarget, color.MipLevel, color.SubElement); + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.UavSpdAtomicCount, Resources.SpdAtomicCounter); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.UavExposureMipLumaChange, Resources.SceneLuminance, ShadingChangeMipLevel); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.UavExposureMip5, Resources.SceneLuminance, 5); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.UavAutoExposure, Resources.AutoExposure); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr2ShaderIDs.CbFsr2, Constants, 0, Marshal.SizeOf()); + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr2ShaderIDs.CbSpd, _spdConstants, 0, Marshal.SizeOf()); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } + + internal class Fsr2ReconstructPreviousDepthPass : Fsr2Pass + { + public Fsr2ReconstructPreviousDepthPass(Fsr2.ContextDescription contextDescription, Fsr2Resources resources, ComputeBuffer constants) + : base(contextDescription, resources, constants) + { + InitComputeShader("Reconstruct & Dilate", contextDescription.Shaders.reconstructPreviousDepthPass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr2.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + ref var color = ref dispatchParams.Color; + ref var depth = ref dispatchParams.Depth; + ref var motionVectors = ref dispatchParams.MotionVectors; + ref var exposure = ref dispatchParams.Exposure; + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvInputColor, color.RenderTarget, color.MipLevel, color.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvInputDepth, depth.RenderTarget, depth.MipLevel, depth.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvInputMotionVectors, motionVectors.RenderTarget, motionVectors.MipLevel, motionVectors.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvInputExposure, exposure.RenderTarget, exposure.MipLevel, exposure.SubElement); + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.UavDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr2ShaderIDs.CbFsr2, Constants, 0, Marshal.SizeOf()); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } + + internal class Fsr2DepthClipPass : Fsr2Pass + { + public Fsr2DepthClipPass(Fsr2.ContextDescription contextDescription, Fsr2Resources resources, ComputeBuffer constants) + : base(contextDescription, resources, constants) + { + InitComputeShader("Depth Clip", contextDescription.Shaders.depthClipPass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr2.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + ref var color = ref dispatchParams.Color; + ref var depth = ref dispatchParams.Depth; + ref var motionVectors = ref dispatchParams.MotionVectors; + ref var exposure = ref dispatchParams.Exposure; + ref var reactive = ref dispatchParams.Reactive; + ref var tac = ref dispatchParams.TransparencyAndComposition; + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvInputColor, color.RenderTarget, color.MipLevel, color.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvInputDepth, depth.RenderTarget, depth.MipLevel, depth.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvInputMotionVectors, motionVectors.RenderTarget, motionVectors.MipLevel, motionVectors.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvInputExposure, exposure.RenderTarget, exposure.MipLevel, exposure.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvReactiveMask, reactive.RenderTarget, reactive.MipLevel, reactive.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvTransparencyAndCompositionMask, tac.RenderTarget, tac.MipLevel, tac.SubElement); + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvReconstructedPrevNearestDepth, Fsr2ShaderIDs.UavReconstructedPrevNearestDepth); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvDilatedDepth, Fsr2ShaderIDs.UavDilatedDepth); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvPrevDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex ^ 1]); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr2ShaderIDs.CbFsr2, Constants, 0, Marshal.SizeOf()); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } + + internal class Fsr2LockPass : Fsr2Pass + { + public Fsr2LockPass(Fsr2.ContextDescription contextDescription, Fsr2Resources resources, ComputeBuffer constants) + : base(contextDescription, resources, constants) + { + InitComputeShader("Create Locks", contextDescription.Shaders.lockPass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr2.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvLockInputLuma, Fsr2ShaderIDs.UavLockInputLuma); + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr2ShaderIDs.CbFsr2, Constants, 0, Marshal.SizeOf()); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } + + internal class Fsr2AccumulatePass : Fsr2Pass + { + private const string SharpeningKeyword = "FFX_FSR2_OPTION_APPLY_SHARPENING"; + +#if UNITY_2021_2_OR_NEWER + private readonly LocalKeyword _sharpeningKeyword; +#endif + + public Fsr2AccumulatePass(Fsr2.ContextDescription contextDescription, Fsr2Resources resources, ComputeBuffer constants) + : base(contextDescription, resources, constants) + { + InitComputeShader("Reproject & Accumulate", contextDescription.Shaders.accumulatePass); +#if UNITY_2021_2_OR_NEWER + _sharpeningKeyword = new LocalKeyword(ComputeShader, SharpeningKeyword); +#endif + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr2.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { +#if UNITY_2021_2_OR_NEWER + if (dispatchParams.EnableSharpening) + commandBuffer.EnableKeyword(ComputeShader, _sharpeningKeyword); + else + commandBuffer.DisableKeyword(ComputeShader, _sharpeningKeyword); +#else + if (dispatchParams.EnableSharpening) + commandBuffer.EnableShaderKeyword(SharpeningKeyword); + else + commandBuffer.DisableShaderKeyword(SharpeningKeyword); +#endif + + if ((ContextDescription.Flags & Fsr2.InitializationFlags.EnableDisplayResolutionMotionVectors) == 0) + { + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]); + } + else + { + ref var motionVectors = ref dispatchParams.MotionVectors; + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvInputMotionVectors, motionVectors.RenderTarget, motionVectors.MipLevel, motionVectors.SubElement); + } + + ref var exposure = ref dispatchParams.Exposure; + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvInputExposure, exposure.RenderTarget, exposure.MipLevel, exposure.SubElement); + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvDilatedReactiveMasks, Fsr2ShaderIDs.UavDilatedReactiveMasks); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvInternalUpscaled, Resources.InternalUpscaled[frameIndex ^ 1]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvLockStatus, Resources.LockStatus[frameIndex ^ 1]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvPreparedInputColor, Fsr2ShaderIDs.UavPreparedInputColor); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvLanczosLut, Resources.LanczosLut); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvUpscaleMaximumBiasLut, Resources.MaximumBiasLut); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvSceneLuminanceMips, Resources.SceneLuminance); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvAutoExposure, Resources.AutoExposure); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvLumaHistory, Resources.LumaHistory[frameIndex ^ 1]); + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.UavInternalUpscaled, Resources.InternalUpscaled[frameIndex]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.UavLockStatus, Resources.LockStatus[frameIndex]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.UavLumaHistory, Resources.LumaHistory[frameIndex]); + + ref var output = ref dispatchParams.Output; + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.UavUpscaledOutput, output.RenderTarget, output.MipLevel, output.SubElement); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr2ShaderIDs.CbFsr2, Constants, 0, Marshal.SizeOf()); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } + + internal class Fsr2SharpenPass : Fsr2Pass + { + private readonly ComputeBuffer _rcasConstants; + + public Fsr2SharpenPass(Fsr2.ContextDescription contextDescription, Fsr2Resources resources, ComputeBuffer constants, ComputeBuffer rcasConstants) + : base(contextDescription, resources, constants) + { + _rcasConstants = rcasConstants; + + InitComputeShader("RCAS Sharpening", contextDescription.Shaders.sharpenPass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr2.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + ref var exposure = ref dispatchParams.Exposure; + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvInputExposure, exposure.RenderTarget, exposure.MipLevel, exposure.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvRcasInput, Resources.InternalUpscaled[frameIndex]); + + ref var output = ref dispatchParams.Output; + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.UavUpscaledOutput, output.RenderTarget, output.MipLevel, output.SubElement); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr2ShaderIDs.CbFsr2, Constants, 0, Marshal.SizeOf()); + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr2ShaderIDs.CbRcas, _rcasConstants, 0, Marshal.SizeOf()); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } + + internal class Fsr2GenerateReactivePass : Fsr2Pass + { + private readonly ComputeBuffer _generateReactiveConstants; + + public Fsr2GenerateReactivePass(Fsr2.ContextDescription contextDescription, Fsr2Resources resources, ComputeBuffer generateReactiveConstants) + : base(contextDescription, resources, null) + { + _generateReactiveConstants = generateReactiveConstants; + + InitComputeShader("Auto-Generate Reactive Mask", contextDescription.Shaders.autoGenReactivePass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr2.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + } + + public void ScheduleDispatch(CommandBuffer commandBuffer, Fsr2.GenerateReactiveDescription dispatchParams, int dispatchX, int dispatchY) + { + commandBuffer.BeginSample(Sampler); + + ref var opaqueOnly = ref dispatchParams.ColorOpaqueOnly; + ref var color = ref dispatchParams.ColorPreUpscale; + ref var reactive = ref dispatchParams.OutReactive; + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvOpaqueOnly, opaqueOnly.RenderTarget, opaqueOnly.MipLevel, opaqueOnly.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvInputColor, color.RenderTarget, color.MipLevel, color.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.UavAutoReactive, reactive.RenderTarget, reactive.MipLevel, reactive.SubElement); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr2ShaderIDs.CbGenReactive, _generateReactiveConstants, 0, Marshal.SizeOf()); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + + commandBuffer.EndSample(Sampler); + } + } + + internal class Fsr2TcrAutogeneratePass : Fsr2Pass + { + private readonly ComputeBuffer _tcrAutogenerateConstants; + + public Fsr2TcrAutogeneratePass(Fsr2.ContextDescription contextDescription, Fsr2Resources resources, ComputeBuffer constants, ComputeBuffer tcrAutogenerateConstants) + : base(contextDescription, resources, constants) + { + _tcrAutogenerateConstants = tcrAutogenerateConstants; + + InitComputeShader("Auto-Generate Transparency & Composition Mask", contextDescription.Shaders.tcrAutoGenPass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr2.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + ref var color = ref dispatchParams.Color; + ref var motionVectors = ref dispatchParams.MotionVectors; + ref var opaqueOnly = ref dispatchParams.ColorOpaqueOnly; + ref var reactive = ref dispatchParams.Reactive; + ref var tac = ref dispatchParams.TransparencyAndComposition; + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvOpaqueOnly, opaqueOnly.RenderTarget, opaqueOnly.MipLevel, opaqueOnly.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvInputColor, color.RenderTarget, color.MipLevel, color.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvInputMotionVectors, motionVectors.RenderTarget, motionVectors.MipLevel, motionVectors.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvPrevColorPreAlpha, Resources.PrevPreAlpha[frameIndex ^ 1]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvPrevColorPostAlpha, Resources.PrevPostAlpha[frameIndex ^ 1]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvReactiveMask, reactive.RenderTarget, reactive.MipLevel, reactive.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvTransparencyAndCompositionMask, tac.RenderTarget, tac.MipLevel, tac.SubElement); + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.UavAutoReactive, Resources.AutoReactive); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.UavAutoComposition, Resources.AutoComposition); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.UavPrevColorPreAlpha, Resources.PrevPreAlpha[frameIndex]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.UavPrevColorPostAlpha, Resources.PrevPostAlpha[frameIndex]); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr2ShaderIDs.CbFsr2, Constants, 0, Marshal.SizeOf()); + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr2ShaderIDs.CbGenReactive, _tcrAutogenerateConstants, 0, Marshal.SizeOf()); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } +} diff --git a/Runtime/FSR2/Fsr2Pass.cs.meta b/Runtime/FSR2/Fsr2Pass.cs.meta new file mode 100644 index 0000000..06ce7e4 --- /dev/null +++ b/Runtime/FSR2/Fsr2Pass.cs.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: 14234aed75ce454183019d2e20a44d24 +timeCreated: 1676885169 \ No newline at end of file diff --git a/Runtime/FSR2/Fsr2Resources.cs b/Runtime/FSR2/Fsr2Resources.cs new file mode 100644 index 0000000..a0dde4a --- /dev/null +++ b/Runtime/FSR2/Fsr2Resources.cs @@ -0,0 +1,258 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using System; +using UnityEngine; +using UnityEngine.Experimental.Rendering; +using UnityEngine.Rendering; + +namespace FidelityFX +{ + /// + /// Helper class for bundling and managing persistent resources required by the FSR2 process. + /// This includes lookup tables, default fallback resources and double-buffered resources that get swapped between frames. + /// + internal class Fsr2Resources + { + public Texture2D DefaultExposure; + public Texture2D DefaultReactive; + public Texture2D LanczosLut; + public Texture2D MaximumBiasLut; + public RenderTexture SpdAtomicCounter; + public RenderTexture AutoExposure; + public RenderTexture SceneLuminance; + public RenderTexture AutoReactive; + public RenderTexture AutoComposition; + public readonly RenderTexture[] DilatedMotionVectors = new RenderTexture[2]; + public readonly RenderTexture[] LockStatus = new RenderTexture[2]; + public readonly RenderTexture[] InternalUpscaled = new RenderTexture[2]; + public readonly RenderTexture[] LumaHistory = new RenderTexture[2]; + public readonly RenderTexture[] PrevPreAlpha = new RenderTexture[2]; + public readonly RenderTexture[] PrevPostAlpha = new RenderTexture[2]; + + public void Create(Fsr2.ContextDescription contextDescription) + { + // Generate the data for the LUT + const int lanczos2LutWidth = 128; + float[] lanczos2Weights = new float[lanczos2LutWidth]; + for (int currentLanczosWidthIndex = 0; currentLanczosWidthIndex < lanczos2LutWidth; ++currentLanczosWidthIndex) + { + float x = 2.0f * currentLanczosWidthIndex / (lanczos2LutWidth - 1); + float y = Fsr2.Lanczos2(x); + lanczos2Weights[currentLanczosWidthIndex] = y; + } + + float[] maximumBias = new float[MaximumBiasTextureWidth * MaximumBiasTextureHeight]; + for (int i = 0; i < maximumBias.Length; ++i) + { + maximumBias[i] = MaximumBias[i] / 2.0f; + } + + // Resource FSR3UPSCALER_LanczosLutData: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R16_SNORM, FFX_RESOURCE_FLAGS_NONE + // R16_SNorm textures are not supported by Unity on most platforms, strangely enough. So instead we use R32_SFloat and upload pre-normalized float data. + LanczosLut = new Texture2D(lanczos2LutWidth, 1, GraphicsFormat.R32_SFloat, TextureCreationFlags.None) { name = "FSR3UPSCALER_LanczosLutData" }; + LanczosLut.SetPixelData(lanczos2Weights, 0); + LanczosLut.Apply(); + + // Resource FSR3UPSCALER_MaximumUpsampleBias: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R16_SNORM, FFX_RESOURCE_FLAGS_NONE + MaximumBiasLut = new Texture2D(MaximumBiasTextureWidth, MaximumBiasTextureHeight, GraphicsFormat.R32_SFloat, TextureCreationFlags.None) { name = "FSR3UPSCALER_MaximumUpsampleBias" }; + MaximumBiasLut.SetPixelData(maximumBias, 0); + MaximumBiasLut.Apply(); + + // Resource FSR3UPSCALER_DefaultExposure: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R32G32_FLOAT, FFX_RESOURCE_FLAGS_NONE + DefaultExposure = new Texture2D(1, 1, GraphicsFormat.R32G32_SFloat, TextureCreationFlags.None) { name = "FSR3UPSCALER_DefaultExposure" }; + DefaultExposure.SetPixel(0, 0, Color.clear); + DefaultExposure.Apply(); + + // Resource FSR3UPSCALER_DefaultReactivityMask: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_NONE + DefaultReactive = new Texture2D(1, 1, GraphicsFormat.R8_UNorm, TextureCreationFlags.None) { name = "FSR3UPSCALER_DefaultReactivityMask" }; + DefaultReactive.SetPixel(0, 0, Color.clear); + DefaultReactive.Apply(); + + // Resource FSR3UPSCALER_SpdAtomicCounter: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_UINT, FFX_RESOURCE_FLAGS_ALIASABLE + // Despite what the original FSR3 codebase says, this resource really isn't aliasable. Resetting this counter to 0 every frame breaks auto-exposure on MacOS Metal. + SpdAtomicCounter = new RenderTexture(1, 1, 0, GraphicsFormat.R32_UInt) { name = "FSR3UPSCALER_SpdAtomicCounter", enableRandomWrite = true }; + SpdAtomicCounter.Create(); + + // Resource FSR3UPSCALER_AutoExposure: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32G32_FLOAT, FFX_RESOURCE_FLAGS_NONE + AutoExposure = new RenderTexture(1, 1, 0, GraphicsFormat.R32G32_SFloat) { name = "FSR3UPSCALER_AutoExposure", enableRandomWrite = true }; + AutoExposure.Create(); + + // Resource FSR3UPSCALER_ExposureMips: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE + // This is a rather special case: it's an aliasable resource, but because we require a mipmap chain and bind specific mip levels per shader, we can't easily use temporary RTs for this. + int w = contextDescription.MaxRenderSize.x / 2, h = contextDescription.MaxRenderSize.y / 2; + int mipCount = 1 + Mathf.FloorToInt(Mathf.Log(Math.Max(w, h), 2.0f)); + SceneLuminance = new RenderTexture(w, h, 0, GraphicsFormat.R16_SFloat, mipCount) { name = "FSR3UPSCALER_ExposureMips", enableRandomWrite = true, useMipMap = true, autoGenerateMips = false }; + SceneLuminance.Create(); + + // Resources FSR3UPSCALER_InternalDilatedVelocity1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16_FLOAT, FFX_RESOURCE_FLAGS_NONE + CreateDoubleBufferedResource(DilatedMotionVectors, "FSR3UPSCALER_InternalDilatedVelocity", contextDescription.MaxRenderSize, GraphicsFormat.R16G16_SFloat); + + // Resources FSR3UPSCALER_LockStatus1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16_FLOAT, FFX_RESOURCE_FLAGS_NONE + CreateDoubleBufferedResource(LockStatus, "FSR3UPSCALER_LockStatus", contextDescription.DisplaySize, GraphicsFormat.R16G16_SFloat); + + // Resources FSR3UPSCALER_InternalUpscaled1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, FFX_RESOURCE_FLAGS_NONE + CreateDoubleBufferedResource(InternalUpscaled, "FSR3UPSCALER_InternalUpscaled", contextDescription.DisplaySize, GraphicsFormat.R16G16B16A16_SFloat); + + // Resources FSR3UPSCALER_LumaHistory1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, FFX_RESOURCE_FLAGS_NONE + CreateDoubleBufferedResource(LumaHistory, "FSR3UPSCALER_LumaHistory", contextDescription.DisplaySize, GraphicsFormat.R8G8B8A8_UNorm); + } + + public void CreateTcrAutogenResources(Fsr2.ContextDescription contextDescription) + { + // Resource FSR3UPSCALER_AutoReactive: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_NONE + AutoReactive = new RenderTexture(contextDescription.MaxRenderSize.x, contextDescription.MaxRenderSize.y, 0, GraphicsFormat.R8_UNorm) { name = "FSR3UPSCALER_AutoReactive", enableRandomWrite = true }; + AutoReactive.Create(); + + // Resource FSR3UPSCALER_AutoComposition: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_NONE + AutoComposition = new RenderTexture(contextDescription.MaxRenderSize.x, contextDescription.MaxRenderSize.y, 0, GraphicsFormat.R8_UNorm) { name = "FSR3UPSCALER_AutoComposition", enableRandomWrite = true }; + AutoComposition.Create(); + + // Resources FSR3UPSCALER_PrevPreAlpha0/1: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R11G11B10_FLOAT, FFX_RESOURCE_FLAGS_NONE + CreateDoubleBufferedResource(PrevPreAlpha, "FSR3UPSCALER_PrevPreAlpha", contextDescription.MaxRenderSize, GraphicsFormat.B10G11R11_UFloatPack32); + + // Resources FSR3UPSCALER_PrevPostAlpha0/1: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R11G11B10_FLOAT, FFX_RESOURCE_FLAGS_NONE + CreateDoubleBufferedResource(PrevPostAlpha, "FSR3UPSCALER_PrevPostAlpha", contextDescription.MaxRenderSize, GraphicsFormat.B10G11R11_UFloatPack32); + } + + // Set up shared aliasable resources, i.e. temporary render textures + // These do not need to persist between frames, but they do need to be available between passes + public static void CreateAliasableResources(CommandBuffer commandBuffer, Fsr2.ContextDescription contextDescription, Fsr2.DispatchDescription dispatchParams) + { + Vector2Int displaySize = contextDescription.DisplaySize; + Vector2Int maxRenderSize = contextDescription.MaxRenderSize; + + // FSR3UPSCALER_ReconstructedPrevNearestDepth: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_UINT, FFX_RESOURCE_FLAGS_ALIASABLE + commandBuffer.GetTemporaryRT(Fsr2ShaderIDs.UavReconstructedPrevNearestDepth, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R32_UInt, 1, true); + + // FSR3UPSCALER_DilatedDepth: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE + commandBuffer.GetTemporaryRT(Fsr2ShaderIDs.UavDilatedDepth, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R32_SFloat, 1, true); + + // FSR3UPSCALER_LockInputLuma: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE + commandBuffer.GetTemporaryRT(Fsr2ShaderIDs.UavLockInputLuma, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R16_SFloat, 1, true); + + // FSR3UPSCALER_DilatedReactiveMasks: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8G8_UNORM, FFX_RESOURCE_FLAGS_ALIASABLE + commandBuffer.GetTemporaryRT(Fsr2ShaderIDs.UavDilatedReactiveMasks, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R8G8_UNorm, 1, true); + + // FSR3UPSCALER_PreparedInputColor: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE + commandBuffer.GetTemporaryRT(Fsr2ShaderIDs.UavPreparedInputColor, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R16G16B16A16_SFloat, 1, true); + + // FSR3UPSCALER_NewLocks: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_ALIASABLE + commandBuffer.GetTemporaryRT(Fsr2ShaderIDs.UavNewLocks, displaySize.x, displaySize.y, 0, default, GraphicsFormat.R8_UNorm, 1, true); + } + + public static void DestroyAliasableResources(CommandBuffer commandBuffer) + { + // Release all of the aliasable resources used this frame + commandBuffer.ReleaseTemporaryRT(Fsr2ShaderIDs.UavReconstructedPrevNearestDepth); + commandBuffer.ReleaseTemporaryRT(Fsr2ShaderIDs.UavDilatedDepth); + commandBuffer.ReleaseTemporaryRT(Fsr2ShaderIDs.UavLockInputLuma); + commandBuffer.ReleaseTemporaryRT(Fsr2ShaderIDs.UavDilatedReactiveMasks); + commandBuffer.ReleaseTemporaryRT(Fsr2ShaderIDs.UavPreparedInputColor); + commandBuffer.ReleaseTemporaryRT(Fsr2ShaderIDs.UavNewLocks); + } + + private static void CreateDoubleBufferedResource(RenderTexture[] resource, string name, Vector2Int size, GraphicsFormat format) + { + for (int i = 0; i < 2; ++i) + { + resource[i] = new RenderTexture(size.x, size.y, 0, format) { name = name + (i + 1), enableRandomWrite = true }; + resource[i].Create(); + } + } + + public void Destroy() + { + DestroyTcrAutogenResources(); + + DestroyResource(LumaHistory); + DestroyResource(InternalUpscaled); + DestroyResource(LockStatus); + DestroyResource(DilatedMotionVectors); + DestroyResource(ref SceneLuminance); + DestroyResource(ref AutoExposure); + DestroyResource(ref DefaultReactive); + DestroyResource(ref DefaultExposure); + DestroyResource(ref MaximumBiasLut); + DestroyResource(ref LanczosLut); + } + + public void DestroyTcrAutogenResources() + { + DestroyResource(PrevPostAlpha); + DestroyResource(PrevPreAlpha); + DestroyResource(ref AutoComposition); + DestroyResource(ref AutoReactive); + } + + private static void DestroyResource(ref Texture2D resource) + { + if (resource == null) + return; + +#if UNITY_EDITOR + if (Application.isPlaying && !UnityEditor.EditorApplication.isPaused) + UnityEngine.Object.Destroy(resource); + else + UnityEngine.Object.DestroyImmediate(resource); +#else + UnityEngine.Object.Destroy(resource); +#endif + resource = null; + } + + private static void DestroyResource(ref RenderTexture resource) + { + if (resource == null) + return; + + resource.Release(); + resource = null; + } + + private static void DestroyResource(RenderTexture[] resource) + { + for (int i = 0; i < resource.Length; ++i) + DestroyResource(ref resource[i]); + } + + private const int MaximumBiasTextureWidth = 16; + private const int MaximumBiasTextureHeight = 16; + private static readonly float[] MaximumBias = + { + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.876f, 1.809f, 1.772f, 1.753f, 1.748f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.869f, 1.801f, 1.764f, 1.745f, 1.739f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.976f, 1.841f, 1.774f, 1.737f, 1.716f, 1.71f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.914f, 1.784f, 1.716f, 1.673f, 1.649f, 1.641f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.793f, 1.676f, 1.604f, 1.562f, 1.54f, 1.533f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.802f, 1.619f, 1.536f, 1.492f, 1.467f, 1.454f, 1.449f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.812f, 1.575f, 1.496f, 1.456f, 1.432f, 1.416f, 1.408f, 1.405f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.555f, 1.479f, 1.438f, 1.413f, 1.398f, 1.387f, 1.381f, 1.379f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.812f, 1.555f, 1.474f, 1.43f, 1.404f, 1.387f, 1.376f, 1.368f, 1.363f, 1.362f, + 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.802f, 1.575f, 1.479f, 1.43f, 1.401f, 1.382f, 1.369f, 1.36f, 1.354f, 1.351f, 1.35f, + 2.0f, 2.0f, 1.976f, 1.914f, 1.793f, 1.619f, 1.496f, 1.438f, 1.404f, 1.382f, 1.367f, 1.357f, 1.349f, 1.344f, 1.341f, 1.34f, + 1.876f, 1.869f, 1.841f, 1.784f, 1.676f, 1.536f, 1.456f, 1.413f, 1.387f, 1.369f, 1.357f, 1.347f, 1.341f, 1.336f, 1.333f, 1.332f, + 1.809f, 1.801f, 1.774f, 1.716f, 1.604f, 1.492f, 1.432f, 1.398f, 1.376f, 1.36f, 1.349f, 1.341f, 1.335f, 1.33f, 1.328f, 1.327f, + 1.772f, 1.764f, 1.737f, 1.673f, 1.562f, 1.467f, 1.416f, 1.387f, 1.368f, 1.354f, 1.344f, 1.336f, 1.33f, 1.326f, 1.323f, 1.323f, + 1.753f, 1.745f, 1.716f, 1.649f, 1.54f, 1.454f, 1.408f, 1.381f, 1.363f, 1.351f, 1.341f, 1.333f, 1.328f, 1.323f, 1.321f, 1.32f, + 1.748f, 1.739f, 1.71f, 1.641f, 1.533f, 1.449f, 1.405f, 1.379f, 1.362f, 1.35f, 1.34f, 1.332f, 1.327f, 1.323f, 1.32f, 1.319f, + }; + } +} diff --git a/Runtime/FSR2/Fsr2Resources.cs.meta b/Runtime/FSR2/Fsr2Resources.cs.meta new file mode 100644 index 0000000..0e993b4 --- /dev/null +++ b/Runtime/FSR2/Fsr2Resources.cs.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: 22ad49193f244ab4a36d0a1512f3015f +timeCreated: 1677236102 \ No newline at end of file diff --git a/Runtime/FSR2/Fsr2ShaderIDs.cs b/Runtime/FSR2/Fsr2ShaderIDs.cs new file mode 100644 index 0000000..5b8fa4b --- /dev/null +++ b/Runtime/FSR2/Fsr2ShaderIDs.cs @@ -0,0 +1,80 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using UnityEngine; + +namespace FidelityFX +{ + public static class Fsr2ShaderIDs + { + // Shader resource views, i.e. read-only bindings + public static readonly int SrvInputColor = Shader.PropertyToID("r_input_color_jittered"); + public static readonly int SrvOpaqueOnly = Shader.PropertyToID("r_input_opaque_only"); + public static readonly int SrvInputMotionVectors = Shader.PropertyToID("r_input_motion_vectors"); + public static readonly int SrvInputDepth = Shader.PropertyToID("r_input_depth"); + public static readonly int SrvInputExposure = Shader.PropertyToID("r_input_exposure"); + public static readonly int SrvAutoExposure = Shader.PropertyToID("r_auto_exposure"); + public static readonly int SrvReactiveMask = Shader.PropertyToID("r_reactive_mask"); + public static readonly int SrvTransparencyAndCompositionMask = Shader.PropertyToID("r_transparency_and_composition_mask"); + public static readonly int SrvReconstructedPrevNearestDepth = Shader.PropertyToID("r_reconstructed_previous_nearest_depth"); + public static readonly int SrvDilatedMotionVectors = Shader.PropertyToID("r_dilated_motion_vectors"); + public static readonly int SrvPrevDilatedMotionVectors = Shader.PropertyToID("r_previous_dilated_motion_vectors"); + public static readonly int SrvDilatedDepth = Shader.PropertyToID("r_dilatedDepth"); + public static readonly int SrvInternalUpscaled = Shader.PropertyToID("r_internal_upscaled_color"); + public static readonly int SrvLockStatus = Shader.PropertyToID("r_lock_status"); + public static readonly int SrvLockInputLuma = Shader.PropertyToID("r_lock_input_luma"); + public static readonly int SrvPreparedInputColor = Shader.PropertyToID("r_prepared_input_color"); + public static readonly int SrvLumaHistory = Shader.PropertyToID("r_luma_history"); + public static readonly int SrvRcasInput = Shader.PropertyToID("r_rcas_input"); + public static readonly int SrvLanczosLut = Shader.PropertyToID("r_lanczos_lut"); + public static readonly int SrvSceneLuminanceMips = Shader.PropertyToID("r_imgMips"); + public static readonly int SrvUpscaleMaximumBiasLut = Shader.PropertyToID("r_upsample_maximum_bias_lut"); + public static readonly int SrvDilatedReactiveMasks = Shader.PropertyToID("r_dilated_reactive_masks"); + public static readonly int SrvPrevColorPreAlpha = Shader.PropertyToID("r_input_prev_color_pre_alpha"); + public static readonly int SrvPrevColorPostAlpha = Shader.PropertyToID("r_input_prev_color_post_alpha"); + + // Unordered access views, i.e. random read/write bindings + public static readonly int UavReconstructedPrevNearestDepth = Shader.PropertyToID("rw_reconstructed_previous_nearest_depth"); + public static readonly int UavDilatedMotionVectors = Shader.PropertyToID("rw_dilated_motion_vectors"); + public static readonly int UavDilatedDepth = Shader.PropertyToID("rw_dilatedDepth"); + public static readonly int UavInternalUpscaled = Shader.PropertyToID("rw_internal_upscaled_color"); + public static readonly int UavLockStatus = Shader.PropertyToID("rw_lock_status"); + public static readonly int UavLockInputLuma = Shader.PropertyToID("rw_lock_input_luma"); + public static readonly int UavNewLocks = Shader.PropertyToID("rw_new_locks"); + public static readonly int UavPreparedInputColor = Shader.PropertyToID("rw_prepared_input_color"); + public static readonly int UavLumaHistory = Shader.PropertyToID("rw_luma_history"); + public static readonly int UavUpscaledOutput = Shader.PropertyToID("rw_upscaled_output"); + public static readonly int UavExposureMipLumaChange = Shader.PropertyToID("rw_img_mip_shading_change"); + public static readonly int UavExposureMip5 = Shader.PropertyToID("rw_img_mip_5"); + public static readonly int UavDilatedReactiveMasks = Shader.PropertyToID("rw_dilated_reactive_masks"); + public static readonly int UavAutoExposure = Shader.PropertyToID("rw_auto_exposure"); + public static readonly int UavSpdAtomicCount = Shader.PropertyToID("rw_spd_global_atomic"); + public static readonly int UavAutoReactive = Shader.PropertyToID("rw_output_autoreactive"); + public static readonly int UavAutoComposition = Shader.PropertyToID("rw_output_autocomposition"); + public static readonly int UavPrevColorPreAlpha = Shader.PropertyToID("rw_output_prev_color_pre_alpha"); + public static readonly int UavPrevColorPostAlpha = Shader.PropertyToID("rw_output_prev_color_post_alpha"); + + // Constant buffer bindings + public static readonly int CbFsr2 = Shader.PropertyToID("cbFSR2"); + public static readonly int CbSpd = Shader.PropertyToID("cbSPD"); + public static readonly int CbRcas = Shader.PropertyToID("cbRCAS"); + public static readonly int CbGenReactive = Shader.PropertyToID("cbGenerateReactive"); + } +} diff --git a/Runtime/FSR2/Fsr2ShaderIDs.cs.meta b/Runtime/FSR2/Fsr2ShaderIDs.cs.meta new file mode 100644 index 0000000..ef83135 --- /dev/null +++ b/Runtime/FSR2/Fsr2ShaderIDs.cs.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: ad1eca9398174f6c85d8bc01d10993df +timeCreated: 1679060863 \ No newline at end of file diff --git a/Runtime/FSR2/fidelityfx.fsr.fsr2.asmdef b/Runtime/FSR2/fidelityfx.fsr.fsr2.asmdef new file mode 100644 index 0000000..85f2869 --- /dev/null +++ b/Runtime/FSR2/fidelityfx.fsr.fsr2.asmdef @@ -0,0 +1,16 @@ +{ + "name": "fidelityfx.fsr.fsr2", + "rootNamespace": "FidelityFX", + "references": [ + "GUID:f355c2e9d97d4d8438e24fa268b0f6a5" + ], + "includePlatforms": [], + "excludePlatforms": [], + "allowUnsafeCode": false, + "overrideReferences": false, + "precompiledReferences": [], + "autoReferenced": true, + "defineConstraints": [], + "versionDefines": [], + "noEngineReferences": false +} \ No newline at end of file diff --git a/Runtime/FSR2/fidelityfx.fsr.fsr2.asmdef.meta b/Runtime/FSR2/fidelityfx.fsr.fsr2.asmdef.meta new file mode 100644 index 0000000..b096c17 --- /dev/null +++ b/Runtime/FSR2/fidelityfx.fsr.fsr2.asmdef.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 8d3d81074ae24444fab654e0a84c9f8d +AssemblyDefinitionImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/FSR3.meta b/Runtime/FSR3.meta new file mode 100644 index 0000000..5399ea1 --- /dev/null +++ b/Runtime/FSR3.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 16d16ad51c3bc2a429d3da6788f65e07 +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/FSR3/Fsr3ShaderIDs.cs b/Runtime/FSR3/Fsr3ShaderIDs.cs new file mode 100644 index 0000000..992e3e6 --- /dev/null +++ b/Runtime/FSR3/Fsr3ShaderIDs.cs @@ -0,0 +1,90 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using UnityEngine; + +namespace FidelityFX +{ + public static class Fsr3ShaderIDs + { + // Shader resource views, i.e. read-only bindings + public static readonly int SrvInputColor = Shader.PropertyToID("r_input_color_jittered"); + public static readonly int SrvOpaqueOnly = Shader.PropertyToID("r_input_opaque_only"); + public static readonly int SrvInputMotionVectors = Shader.PropertyToID("r_input_motion_vectors"); + public static readonly int SrvInputDepth = Shader.PropertyToID("r_input_depth"); + public static readonly int SrvInputExposure = Shader.PropertyToID("r_input_exposure"); + public static readonly int SrvFrameInfo = Shader.PropertyToID("r_frame_info"); + public static readonly int SrvReactiveMask = Shader.PropertyToID("r_reactive_mask"); + public static readonly int SrvTransparencyAndCompositionMask = Shader.PropertyToID("r_transparency_and_composition_mask"); + public static readonly int SrvReconstructedPrevNearestDepth = Shader.PropertyToID("r_reconstructed_previous_nearest_depth"); + public static readonly int SrvDilatedMotionVectors = Shader.PropertyToID("r_dilated_motion_vectors"); + public static readonly int SrvDilatedDepth = Shader.PropertyToID("r_dilated_depth"); + public static readonly int SrvInternalUpscaled = Shader.PropertyToID("r_internal_upscaled_color"); + public static readonly int SrvAccumulation = Shader.PropertyToID("r_accumulation"); + public static readonly int SrvLumaHistory = Shader.PropertyToID("r_luma_history"); + public static readonly int SrvRcasInput = Shader.PropertyToID("r_rcas_input"); + public static readonly int SrvLanczosLut = Shader.PropertyToID("r_lanczos_lut"); + public static readonly int SrvSpdMips = Shader.PropertyToID("r_spd_mips"); + public static readonly int SrvDilatedReactiveMasks = Shader.PropertyToID("r_dilated_reactive_masks"); + public static readonly int SrvNewLocks = Shader.PropertyToID("r_new_locks"); + public static readonly int SrvFarthestDepth = Shader.PropertyToID("r_farthest_depth"); + public static readonly int SrvFarthestDepthMip1 = Shader.PropertyToID("r_farthest_depth_mip1"); + public static readonly int SrvShadingChange = Shader.PropertyToID("r_shading_change"); + public static readonly int SrvCurrentLuma = Shader.PropertyToID("r_current_luma"); + public static readonly int SrvPreviousLuma = Shader.PropertyToID("r_previous_luma"); + public static readonly int SrvLumaInstability = Shader.PropertyToID("r_luma_instability"); + public static readonly int SrvPrevColorPreAlpha = Shader.PropertyToID("r_input_prev_color_pre_alpha"); + public static readonly int SrvPrevColorPostAlpha = Shader.PropertyToID("r_input_prev_color_post_alpha"); + + // Unordered access views, i.e. random read/write bindings + public static readonly int UavReconstructedPrevNearestDepth = Shader.PropertyToID("rw_reconstructed_previous_nearest_depth"); + public static readonly int UavDilatedMotionVectors = Shader.PropertyToID("rw_dilated_motion_vectors"); + public static readonly int UavDilatedDepth = Shader.PropertyToID("rw_dilated_depth"); + public static readonly int UavInternalUpscaled = Shader.PropertyToID("rw_internal_upscaled_color"); + public static readonly int UavAccumulation = Shader.PropertyToID("rw_accumulation"); + public static readonly int UavLumaHistory = Shader.PropertyToID("rw_luma_history"); + public static readonly int UavUpscaledOutput = Shader.PropertyToID("rw_upscaled_output"); + public static readonly int UavDilatedReactiveMasks = Shader.PropertyToID("rw_dilated_reactive_masks"); + public static readonly int UavFrameInfo = Shader.PropertyToID("rw_frame_info"); + public static readonly int UavSpdAtomicCount = Shader.PropertyToID("rw_spd_global_atomic"); + public static readonly int UavNewLocks = Shader.PropertyToID("rw_new_locks"); + public static readonly int UavAutoReactive = Shader.PropertyToID("rw_output_autoreactive"); + public static readonly int UavShadingChange = Shader.PropertyToID("rw_shading_change"); + public static readonly int UavFarthestDepth = Shader.PropertyToID("rw_farthest_depth"); + public static readonly int UavFarthestDepthMip1 = Shader.PropertyToID("rw_farthest_depth_mip1"); + public static readonly int UavCurrentLuma = Shader.PropertyToID("rw_current_luma"); + public static readonly int UavLumaInstability = Shader.PropertyToID("rw_luma_instability"); + public static readonly int UavSpdMip0 = Shader.PropertyToID("rw_spd_mip0"); + public static readonly int UavSpdMip1 = Shader.PropertyToID("rw_spd_mip1"); + public static readonly int UavSpdMip2 = Shader.PropertyToID("rw_spd_mip2"); + public static readonly int UavSpdMip3 = Shader.PropertyToID("rw_spd_mip3"); + public static readonly int UavSpdMip4 = Shader.PropertyToID("rw_spd_mip4"); + public static readonly int UavSpdMip5 = Shader.PropertyToID("rw_spd_mip5"); + public static readonly int UavAutoComposition = Shader.PropertyToID("rw_output_autocomposition"); + public static readonly int UavPrevColorPreAlpha = Shader.PropertyToID("rw_output_prev_color_pre_alpha"); + public static readonly int UavPrevColorPostAlpha = Shader.PropertyToID("rw_output_prev_color_post_alpha"); + + // Constant buffer bindings + public static readonly int CbFsr3Upscaler = Shader.PropertyToID("cbFSR3Upscaler"); + public static readonly int CbSpd = Shader.PropertyToID("cbSPD"); + public static readonly int CbRcas = Shader.PropertyToID("cbRCAS"); + public static readonly int CbGenReactive = Shader.PropertyToID("cbGenerateReactive"); + } +} diff --git a/Runtime/FSR3/Fsr3ShaderIDs.cs.meta b/Runtime/FSR3/Fsr3ShaderIDs.cs.meta new file mode 100644 index 0000000..734aedb --- /dev/null +++ b/Runtime/FSR3/Fsr3ShaderIDs.cs.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: a1e3e7c189594b94897510b4a20b8a00 +timeCreated: 1679060863 \ No newline at end of file diff --git a/Runtime/FSR3/Fsr3Upscaler.cs b/Runtime/FSR3/Fsr3Upscaler.cs new file mode 100644 index 0000000..313c6cf --- /dev/null +++ b/Runtime/FSR3/Fsr3Upscaler.cs @@ -0,0 +1,312 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using System; +using System.Runtime.InteropServices; +using UnityEngine; +using UnityEngine.Rendering; + +namespace FidelityFX +{ + /// + /// A collection of helper functions and data structures required by the FSR3 Upscaler process. + /// + public static class Fsr3Upscaler + { + /// + /// Creates a new FSR3 Upscaler context with standard parameters that are appropriate for the current platform. + /// + public static Fsr3UpscalerContext CreateContext(Vector2Int displaySize, Vector2Int maxRenderSize, Fsr3UpscalerShaders shaders, InitializationFlags flags = 0) + { + if (SystemInfo.usesReversedZBuffer) + flags |= InitializationFlags.EnableDepthInverted; + else + flags &= ~InitializationFlags.EnableDepthInverted; + +#if UNITY_EDITOR || DEVELOPMENT_BUILD + flags |= InitializationFlags.EnableDebugChecking; +#endif + + Debug.Log($"Setting up FSR3 Upscaler with render size: {maxRenderSize.x}x{maxRenderSize.y}, display size: {displaySize.x}x{displaySize.y}, flags: {flags}"); + + var contextDescription = new ContextDescription + { + Flags = flags, + MaxUpscaleSize = displaySize, + MaxRenderSize = maxRenderSize, + Shaders = shaders, + }; + + var context = new Fsr3UpscalerContext(); + context.Create(contextDescription); + return context; + } + + public static float GetUpscaleRatioFromQualityMode(QualityMode qualityMode) + { + switch (qualityMode) + { + case QualityMode.NativeAA: + return 1.0f; + case QualityMode.UltraQuality: + return 1.2f; + case QualityMode.Quality: + return 1.5f; + case QualityMode.Balanced: + return 1.7f; + case QualityMode.Performance: + return 2.0f; + case QualityMode.UltraPerformance: + return 3.0f; + default: + return 1.0f; + } + } + + public static void GetRenderResolutionFromQualityMode( + out int renderWidth, out int renderHeight, + int displayWidth, int displayHeight, QualityMode qualityMode) + { + float ratio = GetUpscaleRatioFromQualityMode(qualityMode); + renderWidth = Mathf.RoundToInt(displayWidth / ratio); + renderHeight = Mathf.RoundToInt(displayHeight / ratio); + } + + public static float GetMipmapBiasOffset(int renderWidth, int displayWidth) + { + return Mathf.Log((float)renderWidth / displayWidth, 2.0f) - 1.0f; + } + + public static int GetJitterPhaseCount(int renderWidth, int displayWidth) + { + const float basePhaseCount = 8.0f; + int jitterPhaseCount = (int)(basePhaseCount * Mathf.Pow((float)displayWidth / renderWidth, 2.0f)); + return jitterPhaseCount; + } + + public static void GetJitterOffset(out float outX, out float outY, int index, int phaseCount) + { + outX = Halton((index % phaseCount) + 1, 2) - 0.5f; + outY = Halton((index % phaseCount) + 1, 3) - 0.5f; + } + + // Calculate halton number for index and base. + private static float Halton(int index, int @base) + { + float f = 1.0f, result = 0.0f; + + for (int currentIndex = index; currentIndex > 0;) { + + f /= @base; + result += f * (currentIndex % @base); + currentIndex = (int)Mathf.Floor((float)currentIndex / @base); + } + + return result; + } + + public static float Lanczos2(float value) + { + return Mathf.Abs(value) < Mathf.Epsilon ? 1.0f : Mathf.Sin(Mathf.PI * value) / (Mathf.PI * value) * (Mathf.Sin(0.5f * Mathf.PI * value) / (0.5f * Mathf.PI * value)); + } + +#if !UNITY_2021_1_OR_NEWER + internal static void SetBufferData(this CommandBuffer commandBuffer, ComputeBuffer computeBuffer, Array data) + { + commandBuffer.SetComputeBufferData(computeBuffer, data); + } +#endif + + public enum QualityMode + { + NativeAA = 0, + UltraQuality = 1, + Quality = 2, + Balanced = 3, + Performance = 4, + UltraPerformance = 5, + } + + [Flags] + public enum InitializationFlags + { + EnableHighDynamicRange = 1 << 0, + EnableDisplayResolutionMotionVectors = 1 << 1, + EnableMotionVectorsJitterCancellation = 1 << 2, + EnableDepthInverted = 1 << 3, + EnableDepthInfinite = 1 << 4, + EnableAutoExposure = 1 << 5, + EnableDynamicResolution = 1 << 6, + EnableFP16Usage = 1 << 7, + EnableDebugChecking = 1 << 8, + } + + [Flags] + public enum DispatchFlags + { + DrawDebugView = 1 << 0, + } + + /// + /// A structure encapsulating the parameters required to initialize FidelityFX Super Resolution 3 upscaling. + /// + public struct ContextDescription + { + public InitializationFlags Flags; + public Vector2Int MaxRenderSize; + public Vector2Int MaxUpscaleSize; + public Fsr3UpscalerShaders Shaders; + } + + /// + /// A structure encapsulating the parameters for dispatching the various passes of FidelityFX Super Resolution 3. + /// + public class DispatchDescription + { + public ResourceView Color; + public ResourceView Depth; + public ResourceView MotionVectors; + public ResourceView Exposure; // optional + public ResourceView Reactive; // optional + public ResourceView TransparencyAndComposition; // optional + public ResourceView Output; + public Vector2 JitterOffset; + public Vector2 MotionVectorScale; + public Vector2Int RenderSize; + public Vector2Int UpscaleSize; + public bool EnableSharpening; + public float Sharpness; + public float FrameTimeDelta; // in seconds + public float PreExposure; + public bool Reset; + public float CameraNear; + public float CameraFar; + public float CameraFovAngleVertical; + public float ViewSpaceToMetersFactor; + public DispatchFlags Flags; + + // EXPERIMENTAL reactive mask generation parameters + public bool EnableAutoReactive; + public ResourceView ColorOpaqueOnly; + public float AutoTcThreshold = 0.05f; + public float AutoTcScale = 1.0f; + public float AutoReactiveScale = 5.0f; + public float AutoReactiveMax = 0.9f; + } + + /// + /// A structure encapsulating the parameters for automatic generation of a reactive mask. + /// The default values for Scale, CutoffThreshold, BinaryValue and Flags were taken from the FSR3 demo project. + /// + public class GenerateReactiveDescription + { + public ResourceView ColorOpaqueOnly; + public ResourceView ColorPreUpscale; + public ResourceView OutReactive; + public Vector2Int RenderSize; + public float Scale = 0.5f; + public float CutoffThreshold = 0.2f; + public float BinaryValue = 0.9f; + public GenerateReactiveFlags Flags = GenerateReactiveFlags.ApplyTonemap | GenerateReactiveFlags.ApplyThreshold | GenerateReactiveFlags.UseComponentsMax; + } + + [Flags] + public enum GenerateReactiveFlags + { + ApplyTonemap = 1 << 0, + ApplyInverseTonemap = 1 << 1, + ApplyThreshold = 1 << 2, + UseComponentsMax = 1 << 3, + } + + [Serializable, StructLayout(LayoutKind.Sequential)] + internal struct UpscalerConstants + { + public Vector2Int renderSize; + public Vector2Int previousFrameRenderSize; + + public Vector2Int upscaleSize; + public Vector2Int previousFrameUpscaleSize; + + public Vector2Int maxRenderSize; + public Vector2Int maxUpscaleSize; + + public Vector4 deviceToViewDepth; + + public Vector2 jitterOffset; + public Vector2 previousFrameJitterOffset; + + public Vector2 motionVectorScale; + public Vector2 downscaleFactor; + + public Vector2 motionVectorJitterCancellation; + public float tanHalfFOV; + public float jitterPhaseCount; + + public float deltaTime; + public float deltaPreExposure; + public float viewSpaceToMetersFactor; + public float frameIndex; + } + + [Serializable, StructLayout(LayoutKind.Sequential)] + internal struct SpdConstants + { + public uint mips; + public uint numWorkGroups; + public uint workGroupOffsetX, workGroupOffsetY; + public uint renderSizeX, renderSizeY; + } + + [Serializable, StructLayout(LayoutKind.Sequential)] + internal struct GenerateReactiveConstants + { + public float scale; + public float threshold; + public float binaryValue; + public uint flags; + } + + [Serializable, StructLayout(LayoutKind.Sequential)] + internal struct GenerateReactiveConstants2 + { + public float autoTcThreshold; + public float autoTcScale; + public float autoReactiveScale; + public float autoReactiveMax; + } + + [Serializable, StructLayout(LayoutKind.Sequential)] + internal struct RcasConstants + { + public RcasConstants(uint sharpness, uint halfSharp) + { + this.sharpness = sharpness; + this.halfSharp = halfSharp; + dummy0 = dummy1 = 0; + } + + public readonly uint sharpness; + public readonly uint halfSharp; + public readonly uint dummy0; + public readonly uint dummy1; + } + } +} diff --git a/Runtime/FSR3/Fsr3Upscaler.cs.meta b/Runtime/FSR3/Fsr3Upscaler.cs.meta new file mode 100644 index 0000000..1c62899 --- /dev/null +++ b/Runtime/FSR3/Fsr3Upscaler.cs.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: 7ee72b891c35d614eac306ca6154b66d +timeCreated: 1673441954 \ No newline at end of file diff --git a/Runtime/FSR3/Fsr3UpscalerAssets.cs b/Runtime/FSR3/Fsr3UpscalerAssets.cs new file mode 100644 index 0000000..c77a998 --- /dev/null +++ b/Runtime/FSR3/Fsr3UpscalerAssets.cs @@ -0,0 +1,177 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using UnityEngine; +using UnityEngine.Serialization; + +namespace FidelityFX +{ + /// + /// Scriptable object containing all shader resources required by FidelityFX Super Resolution 3 (FSR3) Upscaler. + /// These can be stored in an asset file and referenced from a scene or prefab, avoiding the need to load the shaders from a Resources folder. + /// + [CreateAssetMenu(fileName = "FSR3 Upscaler Assets", menuName = "FidelityFX/FSR3 Upscaler Assets", order = 1103)] + public class Fsr3UpscalerAssets : ScriptableObject + { + public Fsr3UpscalerShaders shaders; + +#if UNITY_EDITOR + private void Reset() + { + shaders = new Fsr3UpscalerShaders + { + prepareInputsPass = FindComputeShader("ffx_fsr3upscaler_prepare_inputs_pass"), + lumaPyramidPass = FindComputeShader("ffx_fsr3upscaler_luma_pyramid_pass"), + shadingChangePyramidPass = FindComputeShader("ffx_fsr3upscaler_shading_change_pyramid_pass"), + shadingChangePass = FindComputeShader("ffx_fsr3upscaler_shading_change_pass"), + prepareReactivityPass = FindComputeShader("ffx_fsr3upscaler_prepare_reactivity_pass"), + lumaInstabilityPass = FindComputeShader("ffx_fsr3upscaler_luma_instability_pass"), + accumulatePass = FindComputeShader("ffx_fsr3upscaler_accumulate_pass"), + sharpenPass = FindComputeShader("ffx_fsr3upscaler_rcas_pass"), + autoGenReactivePass = FindComputeShader("ffx_fsr3upscaler_autogen_reactive_pass"), + tcrAutoGenPass = FindComputeShader("ffx_fsr3upscaler_tcr_autogen_pass"), + debugViewPass = FindComputeShader("ffx_fsr3upscaler_debug_view_pass"), + }; + } + + private static ComputeShader FindComputeShader(string name) + { + string[] assetGuids = UnityEditor.AssetDatabase.FindAssets($"t:ComputeShader {name}"); + if (assetGuids == null || assetGuids.Length == 0) + return null; + + string assetPath = UnityEditor.AssetDatabase.GUIDToAssetPath(assetGuids[0]); + return UnityEditor.AssetDatabase.LoadAssetAtPath(assetPath); + } +#endif + } + + /// + /// All the compute shaders used by the FSR3 Upscaler. + /// + [System.Serializable] + public class Fsr3UpscalerShaders + { + /// + /// The compute shader used by the prepare inputs pass. + /// + public ComputeShader prepareInputsPass; + + /// + /// The compute shader used by the luminance pyramid computation pass. + /// + public ComputeShader lumaPyramidPass; + + /// + /// The compute shader used by the shading change pyramid pass. + /// + public ComputeShader shadingChangePyramidPass; + + /// + /// The compute shader used by the shading change pass. + /// + public ComputeShader shadingChangePass; + + /// + /// The compute shader used by the prepare reactivity pass. + /// + public ComputeShader prepareReactivityPass; + + /// + /// The compute shader used by the luma instability pass. + /// + public ComputeShader lumaInstabilityPass; + + /// + /// The compute shader used by the accumulation pass. + /// + public ComputeShader accumulatePass; + + /// + /// The compute shader used by the RCAS sharpening pass. + /// + public ComputeShader sharpenPass; + + /// + /// The compute shader used to auto-generate a reactive mask. + /// + public ComputeShader autoGenReactivePass; + + /// + /// The compute shader used to auto-generate a transparency & composition mask. + /// + public ComputeShader tcrAutoGenPass; + + /// + /// The compute shader used to display a debug view. + /// + public ComputeShader debugViewPass; + + /// + /// Returns a copy of this class and its contents. + /// + public Fsr3UpscalerShaders Clone() + { + return (Fsr3UpscalerShaders)MemberwiseClone(); + } + + /// + /// Returns a copy of this class with clones of all its shaders. + /// This can be useful if you're running multiple FSR3 Upscaler instances with different shader configurations. + /// Be sure to clean up these clones through Dispose once you're done with them. + /// + public Fsr3UpscalerShaders DeepCopy() + { + return new Fsr3UpscalerShaders + { + prepareInputsPass = Object.Instantiate(prepareInputsPass), + lumaPyramidPass = Object.Instantiate(lumaPyramidPass), + shadingChangePyramidPass = Object.Instantiate(shadingChangePyramidPass), + shadingChangePass = Object.Instantiate(shadingChangePass), + prepareReactivityPass = Object.Instantiate(prepareReactivityPass), + lumaInstabilityPass = Object.Instantiate(lumaInstabilityPass), + accumulatePass = Object.Instantiate(accumulatePass), + sharpenPass = Object.Instantiate(sharpenPass), + autoGenReactivePass = Object.Instantiate(autoGenReactivePass), + tcrAutoGenPass = Object.Instantiate(tcrAutoGenPass), + debugViewPass = Object.Instantiate(debugViewPass), + }; + } + + /// + /// Destroy all the shaders within this instance. + /// Use this only on clones created through DeepCopy. + /// + public void Dispose() + { + Object.Destroy(prepareInputsPass); + Object.Destroy(lumaPyramidPass); + Object.Destroy(shadingChangePyramidPass); + Object.Destroy(shadingChangePass); + Object.Destroy(prepareReactivityPass); + Object.Destroy(lumaInstabilityPass); + Object.Destroy(accumulatePass); + Object.Destroy(sharpenPass); + Object.Destroy(autoGenReactivePass); + Object.Destroy(tcrAutoGenPass); + Object.Destroy(debugViewPass); + } + } +} diff --git a/Runtime/FSR3/Fsr3UpscalerAssets.cs.meta b/Runtime/FSR3/Fsr3UpscalerAssets.cs.meta new file mode 100644 index 0000000..7c8565b --- /dev/null +++ b/Runtime/FSR3/Fsr3UpscalerAssets.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 2dbcc608a4754d049a14a0bcce2eb40b +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/FSR3/Fsr3UpscalerCallbacks.cs b/Runtime/FSR3/Fsr3UpscalerCallbacks.cs new file mode 100644 index 0000000..e91d358 --- /dev/null +++ b/Runtime/FSR3/Fsr3UpscalerCallbacks.cs @@ -0,0 +1,81 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using UnityEngine; + +namespace FidelityFX +{ + /// + /// A collection of callbacks required by the FSR3 Upscaler process. + /// This allows some customization by the game dev on how to integrate FSR3 upscaling into their own game setup. + /// + public interface IFsr3UpscalerCallbacks + { + /// + /// Apply a mipmap bias to in-game textures to prevent them from becoming blurry as the internal rendering resolution lowers. + /// This will need to be customized on a per-game basis, as there is no clear universal way to determine what are "in-game" textures. + /// The default implementation will simply apply a mipmap bias to all 2D textures, which will include things like UI textures and which might miss things like terrain texture arrays. + /// + /// Depending on how your game organizes its assets, you will want to create a filter that more specifically selects the textures that need to have this mipmap bias applied. + /// You may also want to store the bias offset value and apply it to any assets that are loaded in on demand. + /// + void ApplyMipmapBias(float biasOffset); + + void UndoMipmapBias(); + } + + /// + /// Default implementation of IFsr3UpscalerCallbacks. + /// These are fine for testing but a proper game will want to extend and override these methods. + /// + public class Fsr3UpscalerCallbacksBase: IFsr3UpscalerCallbacks + { + protected float CurrentBiasOffset = 0; + + public virtual void ApplyMipmapBias(float biasOffset) + { + if (float.IsNaN(biasOffset) || float.IsInfinity(biasOffset)) + return; + + CurrentBiasOffset += biasOffset; + + if (Mathf.Approximately(CurrentBiasOffset, 0f)) + { + CurrentBiasOffset = 0f; + } + + foreach (var texture in Resources.FindObjectsOfTypeAll()) + { + if (texture.mipmapCount <= 1) + continue; + + texture.mipMapBias += biasOffset; + } + } + + public virtual void UndoMipmapBias() + { + if (CurrentBiasOffset == 0f) + return; + + ApplyMipmapBias(-CurrentBiasOffset); + } + } +} diff --git a/Runtime/FSR3/Fsr3UpscalerCallbacks.cs.meta b/Runtime/FSR3/Fsr3UpscalerCallbacks.cs.meta new file mode 100644 index 0000000..d68162b --- /dev/null +++ b/Runtime/FSR3/Fsr3UpscalerCallbacks.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: d52448255f58a6a42bfcf1634f4dd1a8 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/FSR3/Fsr3UpscalerContext.cs b/Runtime/FSR3/Fsr3UpscalerContext.cs new file mode 100644 index 0000000..3dccb54 --- /dev/null +++ b/Runtime/FSR3/Fsr3UpscalerContext.cs @@ -0,0 +1,652 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using System; +using System.Runtime.InteropServices; +using UnityEngine; +using UnityEngine.Rendering; + +namespace FidelityFX +{ + /// + /// This class loosely matches the FfxFsr3UpscalerContext struct from the original FSR3 codebase. + /// It manages the various resources and compute passes required by the FSR3 Upscaler process. + /// Note that this class does not know anything about Unity render pipelines; all it knows is CommandBuffers and RenderTargetIdentifiers. + /// This should make it suitable for integration with any of the available Unity render pipelines. + /// + public class Fsr3UpscalerContext + { + private const int MaxQueuedFrames = 16; + + private Fsr3Upscaler.ContextDescription _contextDescription; + private CommandBuffer _commandBuffer; + + private Fsr3UpscalerPass _prepareInputsPass; + private Fsr3UpscalerPass _lumaPyramidPass; + private Fsr3UpscalerPass _shadingChangePyramidPass; + private Fsr3UpscalerPass _shadingChangePass; + private Fsr3UpscalerPass _prepareReactivityPass; + private Fsr3UpscalerPass _lumaInstabilityPass; + private Fsr3UpscalerPass _accumulatePass; + private Fsr3UpscalerPass _sharpenPass; + private Fsr3UpscalerPass _generateReactivePass; + private Fsr3UpscalerPass _tcrAutogeneratePass; +#if UNITY_EDITOR || DEVELOPMENT_BUILD + private Fsr3UpscalerPass _debugViewPass; +#endif + + private readonly Fsr3UpscalerResources _resources = new Fsr3UpscalerResources(); + + private ComputeBuffer _upscalerConstantsBuffer; + private readonly Fsr3Upscaler.UpscalerConstants[] _upscalerConstantsArray = { new Fsr3Upscaler.UpscalerConstants() }; + private ref Fsr3Upscaler.UpscalerConstants UpscalerConsts => ref _upscalerConstantsArray[0]; + + private ComputeBuffer _spdConstantsBuffer; + private readonly Fsr3Upscaler.SpdConstants[] _spdConstantsArray = { new Fsr3Upscaler.SpdConstants() }; + private ref Fsr3Upscaler.SpdConstants SpdConsts => ref _spdConstantsArray[0]; + + private ComputeBuffer _rcasConstantsBuffer; + private readonly Fsr3Upscaler.RcasConstants[] _rcasConstantsArray = new Fsr3Upscaler.RcasConstants[1]; + private ref Fsr3Upscaler.RcasConstants RcasConsts => ref _rcasConstantsArray[0]; + + private ComputeBuffer _generateReactiveConstantsBuffer; + private readonly Fsr3Upscaler.GenerateReactiveConstants[] _generateReactiveConstantsArray = { new Fsr3Upscaler.GenerateReactiveConstants() }; + private ref Fsr3Upscaler.GenerateReactiveConstants GenReactiveConsts => ref _generateReactiveConstantsArray[0]; + + private ComputeBuffer _tcrAutogenerateConstantsBuffer; + private readonly Fsr3Upscaler.GenerateReactiveConstants2[] _tcrAutogenerateConstantsArray = { new Fsr3Upscaler.GenerateReactiveConstants2() }; + private ref Fsr3Upscaler.GenerateReactiveConstants2 TcrAutoGenConsts => ref _tcrAutogenerateConstantsArray[0]; + + private bool _firstExecution; + private int _resourceFrameIndex; + private Vector2 _previousJitterOffset; + private float _preExposure; + private float _previousFramePreExposure; + + public void Create(Fsr3Upscaler.ContextDescription contextDescription) + { + _contextDescription = contextDescription; + _commandBuffer = new CommandBuffer { name = "FSR3 Upscaler" }; + + _upscalerConstantsBuffer = CreateConstantBuffer(); + _spdConstantsBuffer = CreateConstantBuffer(); + _rcasConstantsBuffer = CreateConstantBuffer(); + _generateReactiveConstantsBuffer = CreateConstantBuffer(); + _tcrAutogenerateConstantsBuffer = CreateConstantBuffer(); + + // Set defaults + _firstExecution = true; + _resourceFrameIndex = 0; + + UpscalerConsts.maxUpscaleSize = _contextDescription.MaxUpscaleSize; + + _resources.Create(_contextDescription); + CreatePasses(); + } + + private void CreatePasses() + { + _prepareInputsPass = new Fsr3UpscalerPrepareInputsPass(_contextDescription, _resources, _upscalerConstantsBuffer); + _lumaPyramidPass = new Fsr3UpscalerLumaPyramidPass(_contextDescription, _resources, _upscalerConstantsBuffer, _spdConstantsBuffer); + _shadingChangePyramidPass = new Fsr3UpscalerShadingChangePyramidPass(_contextDescription, _resources, _upscalerConstantsBuffer, _spdConstantsBuffer); + _shadingChangePass = new Fsr3UpscalerShadingChangePass(_contextDescription, _resources, _upscalerConstantsBuffer); + _prepareReactivityPass = new Fsr3UpscalerPrepareReactivityPass(_contextDescription, _resources, _upscalerConstantsBuffer); + _lumaInstabilityPass = new Fsr3UpscalerLumaInstabilityPass(_contextDescription, _resources, _upscalerConstantsBuffer); + _accumulatePass = new Fsr3UpscalerAccumulatePass(_contextDescription, _resources, _upscalerConstantsBuffer); + _sharpenPass = new Fsr3UpscalerSharpenPass(_contextDescription, _resources, _upscalerConstantsBuffer, _rcasConstantsBuffer); + _generateReactivePass = new Fsr3UpscalerGenerateReactivePass(_contextDescription, _resources, _generateReactiveConstantsBuffer); + _tcrAutogeneratePass = new Fsr3UpscalerTcrAutogeneratePass(_contextDescription, _resources, _upscalerConstantsBuffer, _tcrAutogenerateConstantsBuffer); +#if UNITY_EDITOR || DEVELOPMENT_BUILD + _debugViewPass = new Fsr3UpscalerDebugViewPass(_contextDescription, _resources, _upscalerConstantsBuffer); +#endif + } + + public void Destroy() + { +#if UNITY_EDITOR || DEVELOPMENT_BUILD + DestroyPass(ref _debugViewPass); +#endif + DestroyPass(ref _tcrAutogeneratePass); + DestroyPass(ref _generateReactivePass); + DestroyPass(ref _lumaPyramidPass); + DestroyPass(ref _sharpenPass); + DestroyPass(ref _accumulatePass); + DestroyPass(ref _prepareReactivityPass); + DestroyPass(ref _shadingChangePass); + DestroyPass(ref _shadingChangePyramidPass); + DestroyPass(ref _lumaInstabilityPass); + DestroyPass(ref _prepareInputsPass); + + _resources.Destroy(); + + DestroyConstantBuffer(ref _tcrAutogenerateConstantsBuffer); + DestroyConstantBuffer(ref _generateReactiveConstantsBuffer); + DestroyConstantBuffer(ref _rcasConstantsBuffer); + DestroyConstantBuffer(ref _spdConstantsBuffer); + DestroyConstantBuffer(ref _upscalerConstantsBuffer); + + if (_commandBuffer != null) + { + _commandBuffer.Dispose(); + _commandBuffer = null; + } + } + + public void Dispatch(Fsr3Upscaler.DispatchDescription dispatchParams) + { + _commandBuffer.Clear(); + Dispatch(dispatchParams, _commandBuffer); + Graphics.ExecuteCommandBuffer(_commandBuffer); + } + + public void Dispatch(Fsr3Upscaler.DispatchDescription dispatchParams, CommandBuffer commandBuffer) + { + if ((_contextDescription.Flags & Fsr3Upscaler.InitializationFlags.EnableDebugChecking) != 0) + { + DebugCheckDispatch(dispatchParams); + } + + if (_firstExecution) + { + commandBuffer.SetRenderTarget(_resources.Accumulation[0]); + commandBuffer.ClearRenderTarget(false, true, Color.clear); + commandBuffer.SetRenderTarget(_resources.Accumulation[1]); + commandBuffer.ClearRenderTarget(false, true, Color.clear); + commandBuffer.SetRenderTarget(_resources.Luma[0]); + commandBuffer.ClearRenderTarget(false, true, Color.clear); + commandBuffer.SetRenderTarget(_resources.Luma[1]); + commandBuffer.ClearRenderTarget(false, true, Color.clear); + } + + int frameIndex = _resourceFrameIndex % 2; + bool resetAccumulation = dispatchParams.Reset || _firstExecution; + _firstExecution = false; + + // If auto exposure is enabled use the auto exposure SRV, otherwise what the app sends + if ((_contextDescription.Flags & Fsr3Upscaler.InitializationFlags.EnableAutoExposure) != 0) + dispatchParams.Exposure = new ResourceView(_resources.FrameInfo); + else if (!dispatchParams.Exposure.IsValid) + dispatchParams.Exposure = new ResourceView(_resources.DefaultExposure); + + if (dispatchParams.EnableAutoReactive) + { + // Create the auto-TCR resources only when we need them + if (_resources.AutoReactive == null) + _resources.CreateTcrAutogenResources(_contextDescription); + + if (resetAccumulation) + { + RenderTargetIdentifier opaqueOnly = dispatchParams.ColorOpaqueOnly.IsValid ? dispatchParams.ColorOpaqueOnly.RenderTarget : Fsr3ShaderIDs.SrvOpaqueOnly; + commandBuffer.Blit(_resources.PrevPreAlpha[frameIndex ^ 1], opaqueOnly); + } + } + else if (_resources.AutoReactive != null) + { + // Destroy the auto-TCR resources if we don't use the feature + _resources.DestroyTcrAutogenResources(); + } + + if (!dispatchParams.Reactive.IsValid) dispatchParams.Reactive = new ResourceView(_resources.DefaultReactive); + if (!dispatchParams.TransparencyAndComposition.IsValid) dispatchParams.TransparencyAndComposition = new ResourceView(_resources.DefaultReactive); + Fsr3UpscalerResources.CreateAliasableResources(commandBuffer, _contextDescription, dispatchParams); + + SetupConstants(dispatchParams, resetAccumulation); + + // Reactive mask bias + const int threadGroupWorkRegionDim = 8; + int dispatchSrcX = (UpscalerConsts.renderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchSrcY = (UpscalerConsts.renderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchDstX = (UpscalerConsts.upscaleSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchDstY = (UpscalerConsts.upscaleSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchShadingChangePassX = ((UpscalerConsts.renderSize.x / 2) + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchShadingChangePassY = ((UpscalerConsts.renderSize.y / 2) + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + + // Clear reconstructed depth for max depth store + if (resetAccumulation) + { + commandBuffer.SetRenderTarget(_resources.Accumulation[frameIndex ^ 1]); + commandBuffer.ClearRenderTarget(false, true, Color.clear); + + commandBuffer.SetRenderTarget(_resources.SpdMips); + commandBuffer.ClearRenderTarget(false, true, Color.clear); + + // Auto exposure always used to track luma changes in locking logic + commandBuffer.SetRenderTarget(_resources.FrameInfo); + commandBuffer.ClearRenderTarget(false, true, new Color(0f, 1e8f, 0f, 0f)); + + // Reset atomic counter to 0 + commandBuffer.SetRenderTarget(_resources.SpdAtomicCounter); + commandBuffer.ClearRenderTarget(false, true, Color.clear); + } + + // FSR3: need to clear here since we need the content of this surface for frame interpolation, so clearing in the lock pass is not an option + bool depthInverted = (_contextDescription.Flags & Fsr3Upscaler.InitializationFlags.EnableDepthInverted) == Fsr3Upscaler.InitializationFlags.EnableDepthInverted; + commandBuffer.SetRenderTarget(_resources.ReconstructedPrevNearestDepth); + commandBuffer.ClearRenderTarget(false, true, depthInverted ? Color.clear : Color.white); + + // Auto exposure + SetupSpdConstants(dispatchParams, out var dispatchThreadGroupCount); + + // Initialize constant buffers data + commandBuffer.SetBufferData(_upscalerConstantsBuffer, _upscalerConstantsArray); + commandBuffer.SetBufferData(_spdConstantsBuffer, _spdConstantsArray); + + // Auto reactive + if (dispatchParams.EnableAutoReactive) + { + GenerateTransparencyCompositionReactive(dispatchParams, commandBuffer, frameIndex); + dispatchParams.Reactive = new ResourceView(_resources.AutoReactive); + dispatchParams.TransparencyAndComposition = new ResourceView(_resources.AutoComposition); + } + + _prepareInputsPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY); + _lumaPyramidPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchThreadGroupCount.x, dispatchThreadGroupCount.y); + _shadingChangePyramidPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchThreadGroupCount.x, dispatchThreadGroupCount.y); + _shadingChangePass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchShadingChangePassX, dispatchShadingChangePassY); + _prepareReactivityPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY); + _lumaInstabilityPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY); + + _accumulatePass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchDstX, dispatchDstY); + + if (dispatchParams.EnableSharpening) + { + // Compute the constants + SetupRcasConstants(dispatchParams); + commandBuffer.SetBufferData(_rcasConstantsBuffer, _rcasConstantsArray); + + // Dispatch RCAS + const int threadGroupWorkRegionDimRcas = 16; + int threadGroupsX = (UpscalerConsts.upscaleSize.x + threadGroupWorkRegionDimRcas - 1) / threadGroupWorkRegionDimRcas; + int threadGroupsY = (UpscalerConsts.upscaleSize.y + threadGroupWorkRegionDimRcas - 1) / threadGroupWorkRegionDimRcas; + _sharpenPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, threadGroupsX, threadGroupsY); + } + +#if UNITY_EDITOR || DEVELOPMENT_BUILD + if ((dispatchParams.Flags & Fsr3Upscaler.DispatchFlags.DrawDebugView) != 0) + { + _debugViewPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchDstX, dispatchDstY); + } +#endif + + _resourceFrameIndex = (_resourceFrameIndex + 1) % MaxQueuedFrames; + + Fsr3UpscalerResources.DestroyAliasableResources(commandBuffer); + } + + public void GenerateReactiveMask(Fsr3Upscaler.GenerateReactiveDescription dispatchParams) + { + _commandBuffer.Clear(); + GenerateReactiveMask(dispatchParams, _commandBuffer); + Graphics.ExecuteCommandBuffer(_commandBuffer); + } + + public void GenerateReactiveMask(Fsr3Upscaler.GenerateReactiveDescription dispatchParams, CommandBuffer commandBuffer) + { + const int threadGroupWorkRegionDim = 8; + int dispatchSrcX = (dispatchParams.RenderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchSrcY = (dispatchParams.RenderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + + GenReactiveConsts.scale = dispatchParams.Scale; + GenReactiveConsts.threshold = dispatchParams.CutoffThreshold; + GenReactiveConsts.binaryValue = dispatchParams.BinaryValue; + GenReactiveConsts.flags = (uint)dispatchParams.Flags; + commandBuffer.SetBufferData(_generateReactiveConstantsBuffer, _generateReactiveConstantsArray); + + ((Fsr3UpscalerGenerateReactivePass)_generateReactivePass).ScheduleDispatch(commandBuffer, dispatchParams, dispatchSrcX, dispatchSrcY); + } + + private void GenerateTransparencyCompositionReactive(Fsr3Upscaler.DispatchDescription dispatchParams, CommandBuffer commandBuffer, int frameIndex) + { + const int threadGroupWorkRegionDim = 8; + int dispatchSrcX = (dispatchParams.RenderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + int dispatchSrcY = (dispatchParams.RenderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + + TcrAutoGenConsts.autoTcThreshold = dispatchParams.AutoTcThreshold; + TcrAutoGenConsts.autoTcScale = dispatchParams.AutoTcScale; + TcrAutoGenConsts.autoReactiveScale = dispatchParams.AutoReactiveScale; + TcrAutoGenConsts.autoReactiveMax = dispatchParams.AutoReactiveMax; + commandBuffer.SetBufferData(_tcrAutogenerateConstantsBuffer, _tcrAutogenerateConstantsArray); + + _tcrAutogeneratePass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY); + } + + private void SetupConstants(Fsr3Upscaler.DispatchDescription dispatchParams, bool resetAccumulation) + { + ref Fsr3Upscaler.UpscalerConstants constants = ref UpscalerConsts; + + constants.previousFrameJitterOffset = constants.jitterOffset; + constants.jitterOffset = dispatchParams.JitterOffset; + + constants.previousFrameRenderSize = constants.renderSize; + constants.renderSize = dispatchParams.RenderSize; + constants.maxRenderSize = _contextDescription.MaxRenderSize; + + // Compute the horizontal FOV for the shader from the vertical one + float aspectRatio = (float)dispatchParams.RenderSize.x / dispatchParams.RenderSize.y; + float cameraAngleHorizontal = Mathf.Atan(Mathf.Tan(dispatchParams.CameraFovAngleVertical / 2.0f) * aspectRatio) * 2.0f; + constants.tanHalfFOV = Mathf.Tan(cameraAngleHorizontal * 0.5f); + constants.viewSpaceToMetersFactor = (dispatchParams.ViewSpaceToMetersFactor > 0.0f) ? dispatchParams.ViewSpaceToMetersFactor : 1.0f; + + // Compute params to enable device depth to view space depth computation in shader + constants.deviceToViewDepth = SetupDeviceDepthToViewSpaceDepthParams(dispatchParams); + + constants.previousFrameUpscaleSize = constants.upscaleSize; + if (dispatchParams.UpscaleSize.x == 0 && dispatchParams.UpscaleSize.y == 0) + { + constants.upscaleSize = _contextDescription.MaxUpscaleSize; + } + else + { + constants.upscaleSize = dispatchParams.UpscaleSize; + } + + // To be updated if resource is larger than the actual image size + constants.downscaleFactor = new Vector2((float)constants.renderSize.x / constants.upscaleSize.x, (float)constants.renderSize.y / constants.upscaleSize.y); + + // Calculate pre-exposure relevant factors + constants.deltaPreExposure = 1.0f; + _previousFramePreExposure = _preExposure; + _preExposure = dispatchParams.PreExposure != 0.0f ? dispatchParams.PreExposure : 1.0f; + + if (_previousFramePreExposure > 0.0f) + { + constants.deltaPreExposure = _preExposure / _previousFramePreExposure; + } + + // Motion vector data + Vector2Int motionVectorsTargetSize = (_contextDescription.Flags & Fsr3Upscaler.InitializationFlags.EnableDisplayResolutionMotionVectors) != 0 ? constants.upscaleSize : constants.renderSize; + constants.motionVectorScale = dispatchParams.MotionVectorScale / motionVectorsTargetSize; + + // Compute jitter cancellation + if ((_contextDescription.Flags & Fsr3Upscaler.InitializationFlags.EnableMotionVectorsJitterCancellation) != 0) + { + constants.motionVectorJitterCancellation = (_previousJitterOffset - constants.jitterOffset) / motionVectorsTargetSize; + _previousJitterOffset = constants.jitterOffset; + } + + int jitterPhaseCount = Fsr3Upscaler.GetJitterPhaseCount(dispatchParams.RenderSize.x, _contextDescription.MaxUpscaleSize.x); + if (resetAccumulation || constants.jitterPhaseCount == 0) + { + constants.jitterPhaseCount = jitterPhaseCount; + } + else + { + int jitterPhaseCountDelta = (int)(jitterPhaseCount - constants.jitterPhaseCount); + if (jitterPhaseCountDelta > 0) + constants.jitterPhaseCount++; + else if (jitterPhaseCountDelta < 0) + constants.jitterPhaseCount--; + } + + // Convert delta time to seconds and clamp to [0, 1] + constants.deltaTime = Mathf.Clamp01(dispatchParams.FrameTimeDelta); + + if (resetAccumulation) + constants.frameIndex = 0; + else + constants.frameIndex += 1.0f; + } + + private Vector4 SetupDeviceDepthToViewSpaceDepthParams(Fsr3Upscaler.DispatchDescription dispatchParams) + { + bool inverted = (_contextDescription.Flags & Fsr3Upscaler.InitializationFlags.EnableDepthInverted) != 0; + bool infinite = (_contextDescription.Flags & Fsr3Upscaler.InitializationFlags.EnableDepthInfinite) != 0; + + // make sure it has no impact if near and far plane values are swapped in dispatch params + // the flags "inverted" and "infinite" will decide what transform to use + float min = Mathf.Min(dispatchParams.CameraNear, dispatchParams.CameraFar); + float max = Mathf.Max(dispatchParams.CameraNear, dispatchParams.CameraFar); + + if (inverted) + { + (min, max) = (max, min); + } + + float q = max / (min - max); + float d = -1.0f; + + Vector4 matrixElemC = new Vector4(q, -1.0f - Mathf.Epsilon, q, 0.0f + Mathf.Epsilon); + Vector4 matrixElemE = new Vector4(q * min, -min - Mathf.Epsilon, q * min, max); + + // Revert x and y coords + float aspect = (float)dispatchParams.RenderSize.x / dispatchParams.RenderSize.y; + float cotHalfFovY = Mathf.Cos(0.5f * dispatchParams.CameraFovAngleVertical) / Mathf.Sin(0.5f * dispatchParams.CameraFovAngleVertical); + + int matrixIndex = (inverted ? 2 : 0) + (infinite ? 1 : 0); + return new Vector4( + d * matrixElemC[matrixIndex], + matrixElemE[matrixIndex], + aspect / cotHalfFovY, + 1.0f / cotHalfFovY); + } + + private void SetupRcasConstants(Fsr3Upscaler.DispatchDescription dispatchParams) + { + int sharpnessIndex = Mathf.RoundToInt(Mathf.Clamp01(dispatchParams.Sharpness) * (RcasConfigs.Length - 1)); + RcasConsts = RcasConfigs[sharpnessIndex]; + } + + private void SetupSpdConstants(Fsr3Upscaler.DispatchDescription dispatchParams, out Vector2Int dispatchThreadGroupCount) + { + RectInt rectInfo = new RectInt(0, 0, dispatchParams.RenderSize.x, dispatchParams.RenderSize.y); + SpdSetup(rectInfo, out dispatchThreadGroupCount, out var workGroupOffset, out var numWorkGroupsAndMips); + + // Downsample + ref Fsr3Upscaler.SpdConstants spdConstants = ref SpdConsts; + spdConstants.numWorkGroups = (uint)numWorkGroupsAndMips.x; + spdConstants.mips = (uint)numWorkGroupsAndMips.y; + spdConstants.workGroupOffsetX = (uint)workGroupOffset.x; + spdConstants.workGroupOffsetY = (uint)workGroupOffset.y; + spdConstants.renderSizeX = (uint)dispatchParams.RenderSize.x; + spdConstants.renderSizeY = (uint)dispatchParams.RenderSize.y; + } + + private static void SpdSetup(RectInt rectInfo, out Vector2Int dispatchThreadGroupCount, out Vector2Int workGroupOffset, out Vector2Int numWorkGroupsAndMips, int mips = -1) + { + workGroupOffset = new Vector2Int(rectInfo.x / 64, rectInfo.y / 64); + + int endIndexX = (rectInfo.x + rectInfo.width - 1) / 64; + int endIndexY = (rectInfo.y + rectInfo.height - 1) / 64; + + dispatchThreadGroupCount = new Vector2Int(endIndexX + 1 - workGroupOffset.x, endIndexY + 1 - workGroupOffset.y); + + numWorkGroupsAndMips = new Vector2Int(dispatchThreadGroupCount.x * dispatchThreadGroupCount.y, mips); + if (mips < 0) + { + float resolution = Math.Max(rectInfo.width, rectInfo.height); + numWorkGroupsAndMips.y = Math.Min(Mathf.FloorToInt(Mathf.Log(resolution, 2.0f)), 12); + } + } + + private void DebugCheckDispatch(Fsr3Upscaler.DispatchDescription dispatchParams) + { + if (!dispatchParams.Color.IsValid) + { + Debug.LogError("Color resource is null"); + } + + if (!dispatchParams.Depth.IsValid) + { + Debug.LogError("Depth resource is null"); + } + + if (!dispatchParams.MotionVectors.IsValid) + { + Debug.LogError("MotionVectors resource is null"); + } + + if (dispatchParams.Exposure.IsValid && (_contextDescription.Flags & Fsr3Upscaler.InitializationFlags.EnableAutoExposure) != 0) + { + Debug.LogWarning("Exposure resource provided, however auto exposure flag is present"); + } + + if (!dispatchParams.Output.IsValid) + { + Debug.LogError("Output resource is null"); + } + + if (Mathf.Abs(dispatchParams.JitterOffset.x) > 1.0f || Mathf.Abs(dispatchParams.JitterOffset.y) > 1.0f) + { + Debug.LogWarning("JitterOffset contains value outside of expected range [-1.0, 1.0]"); + } + + if (dispatchParams.MotionVectorScale.x > _contextDescription.MaxRenderSize.x || dispatchParams.MotionVectorScale.y > _contextDescription.MaxRenderSize.y) + { + Debug.LogWarning("MotionVectorScale contains scale value greater than MaxRenderSize"); + } + + if (dispatchParams.MotionVectorScale.x == 0.0f || dispatchParams.MotionVectorScale.y == 0.0f) + { + Debug.LogWarning("MotionVectorScale contains zero scale value"); + } + + if (dispatchParams.RenderSize.x > _contextDescription.MaxRenderSize.x || dispatchParams.RenderSize.y > _contextDescription.MaxRenderSize.y) + { + Debug.LogWarning("RenderSize is greater than context MaxRenderSize"); + } + + if (dispatchParams.RenderSize.x == 0 || dispatchParams.RenderSize.y == 0) + { + Debug.LogWarning("RenderSize contains zero dimension"); + } + + if (dispatchParams.Sharpness < 0.0f || dispatchParams.Sharpness > 1.0f) + { + Debug.LogWarning("Sharpness contains value outside of expected range [0.0, 1.0]"); + } + + if (dispatchParams.FrameTimeDelta > 1.0f) + { + Debug.LogWarning("FrameTimeDelta is greater than 1.0f - this value should be seconds (~0.0166 for 60fps)"); + } + + if (dispatchParams.PreExposure == 0.0f) + { + Debug.LogError("PreExposure provided as 0.0f which is invalid"); + } + + bool infiniteDepth = (_contextDescription.Flags & Fsr3Upscaler.InitializationFlags.EnableDepthInfinite) != 0; + bool inverseDepth = (_contextDescription.Flags & Fsr3Upscaler.InitializationFlags.EnableDepthInverted) != 0; + + if (inverseDepth) + { + if (dispatchParams.CameraNear < dispatchParams.CameraFar) + { + Debug.LogWarning("EnableDepthInverted flag is present yet CameraNear is less than CameraFar"); + } + + if (infiniteDepth) + { + if (dispatchParams.CameraNear < float.MaxValue) + { + Debug.LogWarning("EnableDepthInfinite and EnableDepthInverted present, yet CameraNear != float.MaxValue"); + } + } + + if (dispatchParams.CameraFar < 0.075f) + { + Debug.LogWarning("EnableDepthInverted present, CameraFar value is very low which may result in depth separation artefacting"); + } + } + else + { + if (dispatchParams.CameraNear > dispatchParams.CameraFar) + { + Debug.LogWarning("CameraNear is greater than CameraFar in non-inverted-depth context"); + } + + if (infiniteDepth) + { + if (dispatchParams.CameraFar < float.MaxValue) + { + Debug.LogWarning("EnableDepthInfinite present, yet CameraFar != float.MaxValue"); + } + } + + if (dispatchParams.CameraNear < 0.075f) + { + Debug.LogWarning("CameraNear value is very low which may result in depth separation artefacting"); + } + } + + if (dispatchParams.CameraFovAngleVertical <= 0.0f) + { + Debug.LogError("CameraFovAngleVertical is 0.0f - this value should be > 0.0f"); + } + + if (dispatchParams.CameraFovAngleVertical > Mathf.PI) + { + Debug.LogError("CameraFovAngleVertical is greater than 180 degrees/PI"); + } + } + + /// + /// The FSR3 C++ codebase uses floats bitwise converted to ints to pass sharpness parameters to the RCAS shader. + /// This is not possible in C# without enabling unsafe code compilation, so to avoid that we instead use a table of precomputed values. + /// + private static readonly Fsr3Upscaler.RcasConstants[] RcasConfigs = new [] + { + new Fsr3Upscaler.RcasConstants(1048576000u, 872428544u), + new Fsr3Upscaler.RcasConstants(1049178080u, 877212745u), + new Fsr3Upscaler.RcasConstants(1049823372u, 882390168u), + new Fsr3Upscaler.RcasConstants(1050514979u, 887895276u), + new Fsr3Upscaler.RcasConstants(1051256227u, 893859143u), + new Fsr3Upscaler.RcasConstants(1052050675u, 900216232u), + new Fsr3Upscaler.RcasConstants(1052902144u, 907032080u), + new Fsr3Upscaler.RcasConstants(1053814727u, 914306687u), + new Fsr3Upscaler.RcasConstants(1054792807u, 922105590u), + new Fsr3Upscaler.RcasConstants(1055841087u, 930494326u), + new Fsr3Upscaler.RcasConstants(1056964608u, 939538432u), + new Fsr3Upscaler.RcasConstants(1057566688u, 944322633u), + new Fsr3Upscaler.RcasConstants(1058211980u, 949500056u), + new Fsr3Upscaler.RcasConstants(1058903587u, 955005164u), + new Fsr3Upscaler.RcasConstants(1059644835u, 960969031u), + new Fsr3Upscaler.RcasConstants(1060439283u, 967326120u), + new Fsr3Upscaler.RcasConstants(1061290752u, 974141968u), + new Fsr3Upscaler.RcasConstants(1062203335u, 981416575u), + new Fsr3Upscaler.RcasConstants(1063181415u, 989215478u), + new Fsr3Upscaler.RcasConstants(1064229695u, 997604214u), + new Fsr3Upscaler.RcasConstants(1065353216u, 1006648320), + }; + + private static ComputeBuffer CreateConstantBuffer() where TConstants: struct + { + return new ComputeBuffer(1, Marshal.SizeOf(), ComputeBufferType.Constant); + } + + private static void DestroyConstantBuffer(ref ComputeBuffer bufferRef) + { + if (bufferRef == null) + return; + + bufferRef.Release(); + bufferRef = null; + } + + private static void DestroyPass(ref Fsr3UpscalerPass pass) + { + if (pass == null) + return; + + pass.Dispose(); + pass = null; + } + } +} diff --git a/Runtime/FSR3/Fsr3UpscalerContext.cs.meta b/Runtime/FSR3/Fsr3UpscalerContext.cs.meta new file mode 100644 index 0000000..0b27f6d --- /dev/null +++ b/Runtime/FSR3/Fsr3UpscalerContext.cs.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: 742dda30b4604d5488e7eeda3cf04d56 +timeCreated: 1673442225 \ No newline at end of file diff --git a/Runtime/FSR3/Fsr3UpscalerPass.cs b/Runtime/FSR3/Fsr3UpscalerPass.cs new file mode 100644 index 0000000..3825d1e --- /dev/null +++ b/Runtime/FSR3/Fsr3UpscalerPass.cs @@ -0,0 +1,478 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using System; +using System.Runtime.InteropServices; +using UnityEngine; +using UnityEngine.Profiling; +using UnityEngine.Rendering; + +namespace FidelityFX +{ + /// + /// Base class for all of the compute passes that make up the FSR3 Upscaler process. + /// This loosely matches the FfxPipelineState struct from the original FSR3 codebase, wrapped in an object-oriented blanket. + /// These classes are responsible for loading compute shaders, managing temporary resources, binding resources to shader kernels and dispatching said shaders. + /// + internal abstract class Fsr3UpscalerPass: IDisposable + { + protected readonly Fsr3Upscaler.ContextDescription ContextDescription; + protected readonly Fsr3UpscalerResources Resources; + protected readonly ComputeBuffer Constants; + + protected ComputeShader ComputeShader; + protected int KernelIndex; + + protected CustomSampler Sampler; + + protected Fsr3UpscalerPass(Fsr3Upscaler.ContextDescription contextDescription, Fsr3UpscalerResources resources, ComputeBuffer constants) + { + ContextDescription = contextDescription; + Resources = resources; + Constants = constants; + } + + public virtual void Dispose() + { + } + + public void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + commandBuffer.BeginSample(Sampler); + DoScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchX, dispatchY); + commandBuffer.EndSample(Sampler); + } + + protected abstract void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY); + + protected void InitComputeShader(string passName, ComputeShader shader) + { + InitComputeShader(passName, shader, ContextDescription.Flags); + } + + private void InitComputeShader(string passName, ComputeShader shader, Fsr3Upscaler.InitializationFlags flags) + { + if (shader == null) + { + throw new MissingReferenceException($"Shader for FSR3 Upscaler pass '{passName}' could not be loaded! Please ensure it is included in the project correctly."); + } + + ComputeShader = shader; + KernelIndex = ComputeShader.FindKernel("CS"); + Sampler = CustomSampler.Create(passName); + + bool useLut = false; +#if UNITY_2022_1_OR_NEWER // This will also work in 2020.3.43+ and 2021.3.14+ + if (SystemInfo.computeSubGroupSize == 64) + { + useLut = true; + } +#endif + + // This matches the permutation rules from the CreatePipeline* functions + if ((flags & Fsr3Upscaler.InitializationFlags.EnableHighDynamicRange) != 0) ComputeShader.EnableKeyword("FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT"); + if ((flags & Fsr3Upscaler.InitializationFlags.EnableDisplayResolutionMotionVectors) == 0) ComputeShader.EnableKeyword("FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS"); + if ((flags & Fsr3Upscaler.InitializationFlags.EnableMotionVectorsJitterCancellation) != 0) ComputeShader.EnableKeyword("FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS"); + if ((flags & Fsr3Upscaler.InitializationFlags.EnableDepthInverted) != 0) ComputeShader.EnableKeyword("FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH"); + if (useLut) ComputeShader.EnableKeyword("FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE"); + if ((flags & Fsr3Upscaler.InitializationFlags.EnableFP16Usage) != 0) ComputeShader.EnableKeyword("FFX_HALF"); + + // Inform the shader which render pipeline we're currently using + var pipeline = GraphicsSettings.currentRenderPipeline; + if (pipeline != null && pipeline.GetType().Name.Contains("HDRenderPipeline")) + { + ComputeShader.EnableKeyword("UNITY_FSR_HDRP"); + } + } + } + + internal class Fsr3UpscalerPrepareInputsPass : Fsr3UpscalerPass + { + public Fsr3UpscalerPrepareInputsPass(Fsr3Upscaler.ContextDescription contextDescription, Fsr3UpscalerResources resources, ComputeBuffer constants) + : base(contextDescription, resources, constants) + { + InitComputeShader("Prepare Inputs", contextDescription.Shaders.prepareInputsPass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + ref var color = ref dispatchParams.Color; + ref var depth = ref dispatchParams.Depth; + ref var motionVectors = ref dispatchParams.MotionVectors; + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputColor, color.RenderTarget, color.MipLevel, color.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputDepth, depth.RenderTarget, depth.MipLevel, depth.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputMotionVectors, motionVectors.RenderTarget, motionVectors.MipLevel, motionVectors.SubElement); + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavDilatedMotionVectors, Resources.DilatedVelocity); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavDilatedDepth, Resources.DilatedDepth); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavReconstructedPrevNearestDepth, Resources.ReconstructedPrevNearestDepth); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavCurrentLuma, Resources.Luma[frameIndex]); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3Upscaler, Constants, 0, Marshal.SizeOf()); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } + + internal class Fsr3UpscalerLumaPyramidPass : Fsr3UpscalerPass + { + private readonly ComputeBuffer _spdConstants; + + public Fsr3UpscalerLumaPyramidPass(Fsr3Upscaler.ContextDescription contextDescription, Fsr3UpscalerResources resources, ComputeBuffer constants, ComputeBuffer spdConstants) + : base(contextDescription, resources, constants) + { + _spdConstants = spdConstants; + + InitComputeShader("Compute Luminance Pyramid", contextDescription.Shaders.lumaPyramidPass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvCurrentLuma, Resources.Luma[frameIndex]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvFarthestDepth, Fsr3ShaderIDs.UavFarthestDepth); + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavSpdAtomicCount, Resources.SpdAtomicCounter); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavFrameInfo, Resources.FrameInfo); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavSpdMip0, Resources.SpdMips, 0); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavSpdMip1, Resources.SpdMips, 1); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavSpdMip2, Resources.SpdMips, 2); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavSpdMip3, Resources.SpdMips, 3); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavSpdMip4, Resources.SpdMips, 4); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavSpdMip5, Resources.SpdMips, 5); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3Upscaler, Constants, 0, Marshal.SizeOf()); + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbSpd, _spdConstants, 0, Marshal.SizeOf()); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } + + internal class Fsr3UpscalerShadingChangePyramidPass : Fsr3UpscalerPass + { + private readonly ComputeBuffer _spdConstants; + + public Fsr3UpscalerShadingChangePyramidPass(Fsr3Upscaler.ContextDescription contextDescription, Fsr3UpscalerResources resources, ComputeBuffer constants, ComputeBuffer spdConstants) + : base(contextDescription, resources, constants) + { + _spdConstants = spdConstants; + + InitComputeShader("Compute Shading Change Pyramid", contextDescription.Shaders.shadingChangePyramidPass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + ref var exposure = ref dispatchParams.Exposure; + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvCurrentLuma, Resources.Luma[frameIndex]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvPreviousLuma, Resources.Luma[frameIndex ^ 1]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvDilatedMotionVectors, Resources.DilatedVelocity); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputExposure, exposure.RenderTarget, exposure.MipLevel, exposure.SubElement); + + // TODO: possibly the atomic counter might have to get cleared in-between passes. Check on MacOS whether we have issues with this or not. + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavSpdAtomicCount, Resources.SpdAtomicCounter); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavSpdMip0, Resources.SpdMips, 0); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavSpdMip1, Resources.SpdMips, 1); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavSpdMip2, Resources.SpdMips, 2); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavSpdMip3, Resources.SpdMips, 3); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavSpdMip4, Resources.SpdMips, 4); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavSpdMip5, Resources.SpdMips, 5); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3Upscaler, Constants, 0, Marshal.SizeOf()); + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbSpd, _spdConstants, 0, Marshal.SizeOf()); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } + + internal class Fsr3UpscalerShadingChangePass : Fsr3UpscalerPass + { + public Fsr3UpscalerShadingChangePass(Fsr3Upscaler.ContextDescription contextDescription, Fsr3UpscalerResources resources, ComputeBuffer constants) + : base(contextDescription, resources, constants) + { + InitComputeShader("Compute Shading Change", contextDescription.Shaders.shadingChangePass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvSpdMips, Resources.SpdMips); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3Upscaler, Constants, 0, Marshal.SizeOf()); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } + + internal class Fsr3UpscalerPrepareReactivityPass : Fsr3UpscalerPass + { + public Fsr3UpscalerPrepareReactivityPass(Fsr3Upscaler.ContextDescription contextDescription, Fsr3UpscalerResources resources, ComputeBuffer constants) + : base(contextDescription, resources, constants) + { + InitComputeShader("Prepare Reactivity", contextDescription.Shaders.prepareReactivityPass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + ref var exposure = ref dispatchParams.Exposure; + ref var reactive = ref dispatchParams.Reactive; + ref var tac = ref dispatchParams.TransparencyAndComposition; + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvReconstructedPrevNearestDepth, Resources.ReconstructedPrevNearestDepth); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvDilatedMotionVectors, Resources.DilatedVelocity); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvDilatedDepth, Resources.DilatedDepth); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvReactiveMask, reactive.RenderTarget, reactive.MipLevel, reactive.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvTransparencyAndCompositionMask, tac.RenderTarget, tac.MipLevel, tac.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvAccumulation, Resources.Accumulation[frameIndex ^ 1]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvShadingChange, Fsr3ShaderIDs.UavShadingChange); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvCurrentLuma, Resources.Luma[frameIndex]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputExposure, exposure.RenderTarget, exposure.MipLevel, exposure.SubElement); + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavAccumulation, Resources.Accumulation[frameIndex]); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3Upscaler, Constants, 0, Marshal.SizeOf()); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } + + internal class Fsr3UpscalerLumaInstabilityPass : Fsr3UpscalerPass + { + public Fsr3UpscalerLumaInstabilityPass(Fsr3Upscaler.ContextDescription contextDescription, Fsr3UpscalerResources resources, ComputeBuffer constants) + : base(contextDescription, resources, constants) + { + InitComputeShader("Compute Luminance Instability", contextDescription.Shaders.lumaInstabilityPass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + ref var exposure = ref dispatchParams.Exposure; + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputExposure, exposure.RenderTarget, exposure.MipLevel, exposure.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvDilatedReactiveMasks, Fsr3ShaderIDs.UavDilatedReactiveMasks); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvDilatedMotionVectors, Resources.DilatedVelocity); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvFrameInfo, Resources.FrameInfo); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvLumaHistory, Resources.LumaHistory[frameIndex ^ 1]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvFarthestDepthMip1, Fsr3ShaderIDs.UavFarthestDepthMip1); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvCurrentLuma, Resources.Luma[frameIndex]); + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavLumaHistory, Resources.LumaHistory[frameIndex]); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3Upscaler, Constants, 0, Marshal.SizeOf()); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } + + internal class Fsr3UpscalerAccumulatePass : Fsr3UpscalerPass + { + private const string SharpeningKeyword = "FFX_FSR3UPSCALER_OPTION_APPLY_SHARPENING"; + +#if UNITY_2021_2_OR_NEWER + private readonly LocalKeyword _sharpeningKeyword; +#endif + + public Fsr3UpscalerAccumulatePass(Fsr3Upscaler.ContextDescription contextDescription, Fsr3UpscalerResources resources, ComputeBuffer constants) + : base(contextDescription, resources, constants) + { + InitComputeShader("Accumulate", contextDescription.Shaders.accumulatePass); +#if UNITY_2021_2_OR_NEWER + _sharpeningKeyword = new LocalKeyword(ComputeShader, SharpeningKeyword); +#endif + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { +#if UNITY_2021_2_OR_NEWER + if (dispatchParams.EnableSharpening) + commandBuffer.EnableKeyword(ComputeShader, _sharpeningKeyword); + else + commandBuffer.DisableKeyword(ComputeShader, _sharpeningKeyword); +#else + if (dispatchParams.EnableSharpening) + commandBuffer.EnableShaderKeyword(SharpeningKeyword); + else + commandBuffer.DisableShaderKeyword(SharpeningKeyword); +#endif + + ref var color = ref dispatchParams.Color; + ref var exposure = ref dispatchParams.Exposure; + ref var output = ref dispatchParams.Output; + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputExposure, exposure.RenderTarget, exposure.MipLevel, exposure.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvDilatedReactiveMasks, Fsr3ShaderIDs.UavDilatedReactiveMasks); + + if ((ContextDescription.Flags & Fsr3Upscaler.InitializationFlags.EnableDisplayResolutionMotionVectors) == 0) + { + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvDilatedMotionVectors, Resources.DilatedVelocity); + } + else + { + ref var motionVectors = ref dispatchParams.MotionVectors; + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputMotionVectors, motionVectors.RenderTarget, motionVectors.MipLevel, motionVectors.SubElement); + } + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInternalUpscaled, Resources.InternalUpscaled[frameIndex ^ 1]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvLanczosLut, Resources.LanczosLut); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvFarthestDepthMip1, Fsr3ShaderIDs.UavFarthestDepthMip1); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvCurrentLuma, Resources.Luma[frameIndex]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvLumaInstability, Fsr3ShaderIDs.UavLumaInstability); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputColor, color.RenderTarget, color.MipLevel, color.SubElement); + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavInternalUpscaled, Resources.InternalUpscaled[frameIndex]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavUpscaledOutput, output.RenderTarget, output.MipLevel, output.SubElement); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3Upscaler, Constants, 0, Marshal.SizeOf()); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } + + internal class Fsr3UpscalerSharpenPass : Fsr3UpscalerPass + { + private readonly ComputeBuffer _rcasConstants; + + public Fsr3UpscalerSharpenPass(Fsr3Upscaler.ContextDescription contextDescription, Fsr3UpscalerResources resources, ComputeBuffer constants, ComputeBuffer rcasConstants) + : base(contextDescription, resources, constants) + { + _rcasConstants = rcasConstants; + + InitComputeShader("RCAS Sharpening", contextDescription.Shaders.sharpenPass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + ref var exposure = ref dispatchParams.Exposure; + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputExposure, exposure.RenderTarget, exposure.MipLevel, exposure.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvRcasInput, Resources.InternalUpscaled[frameIndex]); + + ref var output = ref dispatchParams.Output; + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavUpscaledOutput, output.RenderTarget, output.MipLevel, output.SubElement); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3Upscaler, Constants, 0, Marshal.SizeOf()); + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbRcas, _rcasConstants, 0, Marshal.SizeOf()); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } + + internal class Fsr3UpscalerGenerateReactivePass : Fsr3UpscalerPass + { + private readonly ComputeBuffer _generateReactiveConstants; + + public Fsr3UpscalerGenerateReactivePass(Fsr3Upscaler.ContextDescription contextDescription, Fsr3UpscalerResources resources, ComputeBuffer generateReactiveConstants) + : base(contextDescription, resources, null) + { + _generateReactiveConstants = generateReactiveConstants; + + InitComputeShader("Auto-Generate Reactive Mask", contextDescription.Shaders.autoGenReactivePass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + } + + public void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.GenerateReactiveDescription dispatchParams, int dispatchX, int dispatchY) + { + commandBuffer.BeginSample(Sampler); + + ref var opaqueOnly = ref dispatchParams.ColorOpaqueOnly; + ref var color = ref dispatchParams.ColorPreUpscale; + ref var reactive = ref dispatchParams.OutReactive; + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvOpaqueOnly, opaqueOnly.RenderTarget, opaqueOnly.MipLevel, opaqueOnly.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputColor, color.RenderTarget, color.MipLevel, color.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavAutoReactive, reactive.RenderTarget, reactive.MipLevel, reactive.SubElement); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbGenReactive, _generateReactiveConstants, 0, Marshal.SizeOf()); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + + commandBuffer.EndSample(Sampler); + } + } + + internal class Fsr3UpscalerTcrAutogeneratePass : Fsr3UpscalerPass + { + private readonly ComputeBuffer _tcrAutogenerateConstants; + + public Fsr3UpscalerTcrAutogeneratePass(Fsr3Upscaler.ContextDescription contextDescription, Fsr3UpscalerResources resources, ComputeBuffer constants, ComputeBuffer tcrAutogenerateConstants) + : base(contextDescription, resources, constants) + { + _tcrAutogenerateConstants = tcrAutogenerateConstants; + + InitComputeShader("Auto-Generate Transparency & Composition Mask", contextDescription.Shaders.tcrAutoGenPass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + ref var color = ref dispatchParams.Color; + ref var motionVectors = ref dispatchParams.MotionVectors; + ref var opaqueOnly = ref dispatchParams.ColorOpaqueOnly; + ref var reactive = ref dispatchParams.Reactive; + ref var tac = ref dispatchParams.TransparencyAndComposition; + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvOpaqueOnly, opaqueOnly.RenderTarget, opaqueOnly.MipLevel, opaqueOnly.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputColor, color.RenderTarget, color.MipLevel, color.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputMotionVectors, motionVectors.RenderTarget, motionVectors.MipLevel, motionVectors.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvPrevColorPreAlpha, Resources.PrevPreAlpha[frameIndex ^ 1]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvPrevColorPostAlpha, Resources.PrevPostAlpha[frameIndex ^ 1]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvReactiveMask, reactive.RenderTarget, reactive.MipLevel, reactive.SubElement); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvTransparencyAndCompositionMask, tac.RenderTarget, tac.MipLevel, tac.SubElement); + + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavAutoReactive, Resources.AutoReactive); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavAutoComposition, Resources.AutoComposition); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavPrevColorPreAlpha, Resources.PrevPreAlpha[frameIndex]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavPrevColorPostAlpha, Resources.PrevPostAlpha[frameIndex]); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3Upscaler, Constants, 0, Marshal.SizeOf()); + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbGenReactive, _tcrAutogenerateConstants, 0, Marshal.SizeOf()); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } + +#if UNITY_EDITOR || DEVELOPMENT_BUILD + internal class Fsr3UpscalerDebugViewPass : Fsr3UpscalerPass + { + public Fsr3UpscalerDebugViewPass(Fsr3Upscaler.ContextDescription contextDescription, Fsr3UpscalerResources resources, ComputeBuffer constants) + : base(contextDescription, resources, constants) + { + InitComputeShader("Debug View", contextDescription.Shaders.debugViewPass); + } + + protected override void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) + { + ref var exposure = ref dispatchParams.Exposure; + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvDilatedReactiveMasks, Fsr3ShaderIDs.UavDilatedReactiveMasks); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvDilatedMotionVectors, Resources.DilatedVelocity); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvDilatedDepth, Resources.DilatedDepth); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInternalUpscaled, Resources.InternalUpscaled[frameIndex]); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputExposure, exposure.RenderTarget, exposure.MipLevel, exposure.SubElement); + + ref var output = ref dispatchParams.Output; + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavUpscaledOutput, output.RenderTarget, output.MipLevel, output.SubElement); + + commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3Upscaler, Constants, 0, Marshal.SizeOf()); + + commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); + } + } +#endif +} diff --git a/Runtime/FSR3/Fsr3UpscalerPass.cs.meta b/Runtime/FSR3/Fsr3UpscalerPass.cs.meta new file mode 100644 index 0000000..424e36c --- /dev/null +++ b/Runtime/FSR3/Fsr3UpscalerPass.cs.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: 9e0cf27d80d1d4d4c81450791a411ead +timeCreated: 1676885169 \ No newline at end of file diff --git a/Runtime/FSR3/Fsr3UpscalerResources.cs b/Runtime/FSR3/Fsr3UpscalerResources.cs new file mode 100644 index 0000000..dc3c149 --- /dev/null +++ b/Runtime/FSR3/Fsr3UpscalerResources.cs @@ -0,0 +1,248 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +using System; +using UnityEngine; +using UnityEngine.Experimental.Rendering; +using UnityEngine.Rendering; + +namespace FidelityFX +{ + /// + /// Helper class for bundling and managing persistent resources required by the FSR3 Upscaler process. + /// This includes lookup tables, default fallback resources and double-buffered resources that get swapped between frames. + /// + internal class Fsr3UpscalerResources + { + public Texture2D LanczosLut; + public Texture2D DefaultExposure; + public Texture2D DefaultReactive; + + public RenderTexture SpdAtomicCounter; + public RenderTexture SpdMips; + public RenderTexture DilatedVelocity; + public RenderTexture DilatedDepth; + public RenderTexture ReconstructedPrevNearestDepth; + public RenderTexture FrameInfo; + public RenderTexture AutoReactive; + public RenderTexture AutoComposition; + + public readonly RenderTexture[] Accumulation = new RenderTexture[2]; + public readonly RenderTexture[] Luma = new RenderTexture[2]; + public readonly RenderTexture[] InternalUpscaled = new RenderTexture[2]; + public readonly RenderTexture[] LumaHistory = new RenderTexture[2]; + public readonly RenderTexture[] PrevPreAlpha = new RenderTexture[2]; + public readonly RenderTexture[] PrevPostAlpha = new RenderTexture[2]; + + public void Create(Fsr3Upscaler.ContextDescription contextDescription) + { + // Generate the data for the LUT + const int lanczos2LutWidth = 128; + float[] lanczos2Weights = new float[lanczos2LutWidth]; + for (int currentLanczosWidthIndex = 0; currentLanczosWidthIndex < lanczos2LutWidth; ++currentLanczosWidthIndex) + { + float x = 2.0f * currentLanczosWidthIndex / (lanczos2LutWidth - 1); + float y = Fsr3Upscaler.Lanczos2(x); + lanczos2Weights[currentLanczosWidthIndex] = y; + } + + Vector2Int maxRenderSize = contextDescription.MaxRenderSize; + Vector2Int maxRenderSizeDiv2 = maxRenderSize / 2; + + // Resource FSR3UPSCALER_LanczosLutData: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R16_SNORM, FFX_RESOURCE_FLAGS_NONE + // R16_SNorm textures are not supported by Unity on most platforms, strangely enough. So instead we use R32_SFloat and upload pre-normalized float data. + LanczosLut = new Texture2D(lanczos2LutWidth, 1, GraphicsFormat.R32_SFloat, TextureCreationFlags.None) { name = "FSR3UPSCALER_LanczosLutData" }; + LanczosLut.SetPixelData(lanczos2Weights, 0); + LanczosLut.Apply(); + + // Resource FSR3UPSCALER_DefaultReactivityMask: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_NONE + DefaultReactive = new Texture2D(1, 1, GraphicsFormat.R8_UNorm, TextureCreationFlags.None) { name = "FSR3UPSCALER_DefaultReactivityMask" }; + DefaultReactive.SetPixel(0, 0, Color.clear); + DefaultReactive.Apply(); + + // Resource FSR3UPSCALER_DefaultExposure: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R32G32_FLOAT, FFX_RESOURCE_FLAGS_NONE + DefaultExposure = new Texture2D(1, 1, GraphicsFormat.R32G32_SFloat, TextureCreationFlags.None) { name = "FSR3UPSCALER_DefaultExposure" }; + DefaultExposure.SetPixel(0, 0, Color.clear); + DefaultExposure.Apply(); + + // Resource FSR3UPSCALER_SpdAtomicCounter: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_UINT, FFX_RESOURCE_FLAGS_ALIASABLE + // Despite what the original FSR3 codebase says, this resource really isn't aliasable. Resetting this counter to 0 every frame breaks auto-exposure on MacOS Metal. + SpdAtomicCounter = new RenderTexture(1, 1, 0, GraphicsFormat.R32_UInt) { name = "FSR3UPSCALER_SpdAtomicCounter", enableRandomWrite = true }; + SpdAtomicCounter.Create(); + + // Resource FSR3UPSCALER_SpdMips: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE + // This is a rather special case: it's an aliasable resource, but because we require a mipmap chain and bind specific mip levels per shader, we can't easily use temporary RTs for this. + int mipCount = 1 + Mathf.FloorToInt(Mathf.Log(Math.Max(maxRenderSizeDiv2.x, maxRenderSizeDiv2.y), 2.0f)); + SpdMips = new RenderTexture(maxRenderSizeDiv2.x, maxRenderSizeDiv2.y, 0, GraphicsFormat.R16G16_SFloat, mipCount) { name = "FSR3UPSCALER_SpdMips", enableRandomWrite = true, useMipMap = true, autoGenerateMips = false }; + SpdMips.Create(); + + // Resource FSR3UPSCALER_DilatedVelocity: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16_FLOAT, FFX_RESOURCE_FLAGS_NONE + DilatedVelocity = new RenderTexture(maxRenderSize.x, maxRenderSize.y, 0, GraphicsFormat.R16G16_SFloat) { name = "FSR3UPSCALER_DilatedVelocity", enableRandomWrite = true }; + DilatedVelocity.Create(); + + // Resource FSR3UPSCALER_DilatedDepth: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_FLOAT, FFX_RESOURCE_FLAGS_NONE + DilatedDepth = new RenderTexture(maxRenderSize.x, maxRenderSize.y, 0, GraphicsFormat.R32_SFloat) { name = "FSR3UPSCALER_DilatedDepth", enableRandomWrite = true }; + DilatedDepth.Create(); + + // Resource FSR3UPSCALER_ReconstructedPrevNearestDepth: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_UINT, FFX_RESOURCE_FLAGS_NONE + ReconstructedPrevNearestDepth = new RenderTexture(maxRenderSize.x, maxRenderSize.y, 0, GraphicsFormat.R32_UInt) { name = "FSR3UPSCALER_ReconstructedPrevNearestDepth", enableRandomWrite = true }; + ReconstructedPrevNearestDepth.Create(); + + // Resource FSR3UPSCALER_FrameInfo: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT, FFX_RESOURCE_FLAGS_NONE + FrameInfo = new RenderTexture(1, 1, 0, GraphicsFormat.R32G32B32A32_SFloat) { name = "FSR3UPSCALER_FrameInfo", enableRandomWrite = true }; + FrameInfo.Create(); + + // Resources FSR3UPSCALER_Accumulation1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_NONE + CreateDoubleBufferedResource(Accumulation, "FSR3UPSCALER_Accumulation", maxRenderSize, GraphicsFormat.R8_UNorm); + + // Resources FSR3UPSCALER_Luma1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16_FLOAT, FFX_RESOURCE_FLAGS_NONE + CreateDoubleBufferedResource(Luma, "FSR3UPSCALER_Luma", maxRenderSize, GraphicsFormat.R16_SFloat); + + // Resources FSR3UPSCALER_InternalUpscaled1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, FFX_RESOURCE_FLAGS_NONE + CreateDoubleBufferedResource(InternalUpscaled, "FSR3UPSCALER_InternalUpscaled", contextDescription.MaxUpscaleSize, GraphicsFormat.R16G16B16A16_SFloat); + + // Resources FSR3UPSCALER_LumaHistory1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, FFX_RESOURCE_FLAGS_NONE + CreateDoubleBufferedResource(LumaHistory, "FSR3UPSCALER_LumaHistory", maxRenderSize, GraphicsFormat.R16G16B16A16_SFloat); + } + + public void CreateTcrAutogenResources(Fsr3Upscaler.ContextDescription contextDescription) + { + // Resource FSR3UPSCALER_AutoReactive: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_NONE + AutoReactive = new RenderTexture(contextDescription.MaxRenderSize.x, contextDescription.MaxRenderSize.y, 0, GraphicsFormat.R8_UNorm) { name = "FSR3UPSCALER_AutoReactive", enableRandomWrite = true }; + AutoReactive.Create(); + + // Resource FSR3UPSCALER_AutoComposition: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_NONE + AutoComposition = new RenderTexture(contextDescription.MaxRenderSize.x, contextDescription.MaxRenderSize.y, 0, GraphicsFormat.R8_UNorm) { name = "FSR3UPSCALER_AutoComposition", enableRandomWrite = true }; + AutoComposition.Create(); + + // Resources FSR3UPSCALER_PrevPreAlpha0/1: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R11G11B10_FLOAT, FFX_RESOURCE_FLAGS_NONE + CreateDoubleBufferedResource(PrevPreAlpha, "FSR3UPSCALER_PrevPreAlpha", contextDescription.MaxRenderSize, GraphicsFormat.B10G11R11_UFloatPack32); + + // Resources FSR3UPSCALER_PrevPostAlpha0/1: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R11G11B10_FLOAT, FFX_RESOURCE_FLAGS_NONE + CreateDoubleBufferedResource(PrevPostAlpha, "FSR3UPSCALER_PrevPostAlpha", contextDescription.MaxRenderSize, GraphicsFormat.B10G11R11_UFloatPack32); + } + + // Set up shared aliasable resources, i.e. temporary render textures + // These do not need to persist between frames, but they do need to be available between passes + public static void CreateAliasableResources(CommandBuffer commandBuffer, Fsr3Upscaler.ContextDescription contextDescription, Fsr3Upscaler.DispatchDescription dispatchParams) + { + Vector2Int maxUpscaleSize = contextDescription.MaxUpscaleSize; + Vector2Int maxRenderSize = contextDescription.MaxRenderSize; + Vector2Int maxRenderSizeDiv2 = maxRenderSize / 2; + + // FSR3UPSCALER_IntermediateFp16x1: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE + // TODO: see if these can be combined into one (this is a reusable temporary intermediate buffer used by different passes for different purposes) + commandBuffer.GetTemporaryRT(Fsr3ShaderIDs.UavFarthestDepth, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R16_SFloat, 1, true); + commandBuffer.GetTemporaryRT(Fsr3ShaderIDs.UavLumaInstability, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R16_SFloat, 1, true); + + // FSR3UPSCALER_ShadingChange: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_ALIASABLE + commandBuffer.GetTemporaryRT(Fsr3ShaderIDs.UavShadingChange, maxRenderSizeDiv2.x, maxRenderSizeDiv2.y, 0, default, GraphicsFormat.R8_UNorm, 1, true); + + // FSR3UPSCALER_NewLocks: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_ALIASABLE + commandBuffer.GetTemporaryRT(Fsr3ShaderIDs.UavNewLocks, maxUpscaleSize.x, maxUpscaleSize.y, 0, default, GraphicsFormat.R8_UNorm, 1, true); + + // FSR3UPSCALER_FarthestDepthMip1: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE + commandBuffer.GetTemporaryRT(Fsr3ShaderIDs.UavFarthestDepthMip1, maxRenderSizeDiv2.x, maxRenderSizeDiv2.y, 0, default, GraphicsFormat.R16_SFloat, 1, true); + + // FSR3UPSCALER_DilatedReactiveMasks: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, FFX_RESOURCE_FLAGS_ALIASABLE + commandBuffer.GetTemporaryRT(Fsr3ShaderIDs.UavDilatedReactiveMasks, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R8G8B8A8_UNorm, 1, true); + } + + public static void DestroyAliasableResources(CommandBuffer commandBuffer) + { + // Release all of the aliasable resources used this frame + commandBuffer.ReleaseTemporaryRT(Fsr3ShaderIDs.UavDilatedReactiveMasks); + commandBuffer.ReleaseTemporaryRT(Fsr3ShaderIDs.UavFarthestDepthMip1); + commandBuffer.ReleaseTemporaryRT(Fsr3ShaderIDs.UavNewLocks); + commandBuffer.ReleaseTemporaryRT(Fsr3ShaderIDs.UavShadingChange); + commandBuffer.ReleaseTemporaryRT(Fsr3ShaderIDs.UavLumaInstability); + commandBuffer.ReleaseTemporaryRT(Fsr3ShaderIDs.UavFarthestDepth); + } + + private static void CreateDoubleBufferedResource(RenderTexture[] resource, string name, Vector2Int size, GraphicsFormat format) + { + for (int i = 0; i < 2; ++i) + { + resource[i] = new RenderTexture(size.x, size.y, 0, format) { name = name + (i + 1), enableRandomWrite = true }; + resource[i].Create(); + } + } + + public void Destroy() + { + DestroyTcrAutogenResources(); + + DestroyResource(LumaHistory); + DestroyResource(InternalUpscaled); + DestroyResource(Luma); + DestroyResource(Accumulation); + + DestroyResource(ref FrameInfo); + DestroyResource(ref ReconstructedPrevNearestDepth); + DestroyResource(ref DilatedDepth); + DestroyResource(ref DilatedVelocity); + DestroyResource(ref SpdMips); + DestroyResource(ref SpdAtomicCounter); + + DestroyResource(ref DefaultReactive); + DestroyResource(ref DefaultExposure); + DestroyResource(ref LanczosLut); + } + + public void DestroyTcrAutogenResources() + { + DestroyResource(PrevPostAlpha); + DestroyResource(PrevPreAlpha); + DestroyResource(ref AutoComposition); + DestroyResource(ref AutoReactive); + } + + private static void DestroyResource(ref Texture2D resource) + { + if (resource == null) + return; + +#if UNITY_EDITOR + if (Application.isPlaying && !UnityEditor.EditorApplication.isPaused) + UnityEngine.Object.Destroy(resource); + else + UnityEngine.Object.DestroyImmediate(resource); +#else + UnityEngine.Object.Destroy(resource); +#endif + resource = null; + } + + private static void DestroyResource(ref RenderTexture resource) + { + if (resource == null) + return; + + resource.Release(); + resource = null; + } + + private static void DestroyResource(RenderTexture[] resource) + { + for (int i = 0; i < resource.Length; ++i) + DestroyResource(ref resource[i]); + } + } +} diff --git a/Runtime/FSR3/Fsr3UpscalerResources.cs.meta b/Runtime/FSR3/Fsr3UpscalerResources.cs.meta new file mode 100644 index 0000000..62d6dc1 --- /dev/null +++ b/Runtime/FSR3/Fsr3UpscalerResources.cs.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: 6749e0e55c9ac5a4f869bc73dfbb7163 +timeCreated: 1677236102 \ No newline at end of file diff --git a/Runtime/FSR3/fidelityfx.fsr.fsr3upscaler.asmdef b/Runtime/FSR3/fidelityfx.fsr.fsr3upscaler.asmdef new file mode 100644 index 0000000..678ea1a --- /dev/null +++ b/Runtime/FSR3/fidelityfx.fsr.fsr3upscaler.asmdef @@ -0,0 +1,16 @@ +{ + "name": "fidelityfx.fsr.fsr3upscaler", + "rootNamespace": "FidelityFX", + "references": [ + "GUID:f355c2e9d97d4d8438e24fa268b0f6a5" + ], + "includePlatforms": [], + "excludePlatforms": [], + "allowUnsafeCode": false, + "overrideReferences": false, + "precompiledReferences": [], + "autoReferenced": true, + "defineConstraints": [], + "versionDefines": [], + "noEngineReferences": false +} \ No newline at end of file diff --git a/Runtime/FSR3/fidelityfx.fsr.fsr3upscaler.asmdef.meta b/Runtime/FSR3/fidelityfx.fsr.fsr3upscaler.asmdef.meta new file mode 100644 index 0000000..a720616 --- /dev/null +++ b/Runtime/FSR3/fidelityfx.fsr.fsr3upscaler.asmdef.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: c64cdcb81f02fbf4f9dc91468e49c2b7 +AssemblyDefinitionImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders.meta b/Shaders.meta new file mode 100644 index 0000000..cf5a649 --- /dev/null +++ b/Shaders.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: af3a6660394230a4bb247808bbefe9f4 +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/ffx_fsr2_accumulate_pass.compute b/Shaders/ffx_fsr2_accumulate_pass.compute new file mode 100644 index 0000000..c022799 --- /dev/null +++ b/Shaders/ffx_fsr2_accumulate_pass.compute @@ -0,0 +1,41 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma kernel CS + +#pragma multi_compile_local __ FFX_HALF +#pragma multi_compile_local __ FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE +#pragma multi_compile_local __ FFX_FSR2_OPTION_HDR_COLOR_INPUT +#pragma multi_compile_local __ FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR2_OPTION_INVERTED_DEPTH +#pragma multi_compile_local __ FFX_FSR2_OPTION_APPLY_SHARPENING + +#pragma multi_compile_local __ UNITY_FSR_HDRP + +#include "ffx_fsr_unity_common.cginc" + +// Ensure the correct value is defined for this keyword, as it is used to select one of multiple sampler functions +#ifdef FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE +#undef FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE +#define FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE 1 +#endif + +#include "shaders/ffx_fsr2_accumulate_pass.hlsl" diff --git a/Shaders/ffx_fsr2_accumulate_pass.compute.meta b/Shaders/ffx_fsr2_accumulate_pass.compute.meta new file mode 100644 index 0000000..5936e95 --- /dev/null +++ b/Shaders/ffx_fsr2_accumulate_pass.compute.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 7e791d69a5be98247a93b63897bc64df +ComputeShaderImporter: + externalObjects: {} + preprocessorOverride: 0 + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/ffx_fsr2_autogen_reactive_pass.compute b/Shaders/ffx_fsr2_autogen_reactive_pass.compute new file mode 100644 index 0000000..0f60ec6 --- /dev/null +++ b/Shaders/ffx_fsr2_autogen_reactive_pass.compute @@ -0,0 +1,32 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma kernel CS + +#pragma multi_compile_local __ FFX_HALF +#pragma multi_compile_local __ FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR2_OPTION_INVERTED_DEPTH + +#pragma multi_compile_local __ UNITY_FSR_HDRP + +#include "ffx_fsr_unity_common.cginc" + +#include "shaders/ffx_fsr2_autogen_reactive_pass.hlsl" diff --git a/Shaders/ffx_fsr2_autogen_reactive_pass.compute.meta b/Shaders/ffx_fsr2_autogen_reactive_pass.compute.meta new file mode 100644 index 0000000..69662d6 --- /dev/null +++ b/Shaders/ffx_fsr2_autogen_reactive_pass.compute.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 67ee1b32ca5e4234db9f06984c783dee +ComputeShaderImporter: + externalObjects: {} + preprocessorOverride: 0 + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/ffx_fsr2_compute_luminance_pyramid_pass.compute b/Shaders/ffx_fsr2_compute_luminance_pyramid_pass.compute new file mode 100644 index 0000000..ecd2120 --- /dev/null +++ b/Shaders/ffx_fsr2_compute_luminance_pyramid_pass.compute @@ -0,0 +1,42 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma kernel CS + +#pragma multi_compile_local __ FFX_HALF +#pragma multi_compile_local __ FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR2_OPTION_INVERTED_DEPTH + +#pragma multi_compile_local __ UNITY_FSR_HDRP + +#include "ffx_fsr_unity_common.cginc" + +// Wave operations require shader model 6.0; this can only be enabled when using DXC on D3D12 +// These pragmas are commented out by default as Unity will sometimes ignore the #if's and try to enable these features anyway. +// Uncomment the below lines if you intend to try wave operations on DX12 with the DXC compiler. +//#if defined(UNITY_COMPILER_DXC) && defined(SHADER_API_D3D12) +//#pragma require WaveBasic // Required for WaveGetLaneIndex +//#pragma require WaveBallot // Required for WaveReadLaneAt +//#else +#define FFX_SPD_NO_WAVE_OPERATIONS +//#endif + +#include "shaders/ffx_fsr2_compute_luminance_pyramid_pass.hlsl" diff --git a/Shaders/ffx_fsr2_compute_luminance_pyramid_pass.compute.meta b/Shaders/ffx_fsr2_compute_luminance_pyramid_pass.compute.meta new file mode 100644 index 0000000..91fe4a2 --- /dev/null +++ b/Shaders/ffx_fsr2_compute_luminance_pyramid_pass.compute.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 04c3480675e29a340808141e68d4cc8b +ComputeShaderImporter: + externalObjects: {} + preprocessorOverride: 0 + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/ffx_fsr2_depth_clip_pass.compute b/Shaders/ffx_fsr2_depth_clip_pass.compute new file mode 100644 index 0000000..e9a5ec3 --- /dev/null +++ b/Shaders/ffx_fsr2_depth_clip_pass.compute @@ -0,0 +1,32 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma kernel CS + +#pragma multi_compile_local __ FFX_HALF +#pragma multi_compile_local __ FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR2_OPTION_INVERTED_DEPTH + +#pragma multi_compile_local __ UNITY_FSR_HDRP + +#include "ffx_fsr_unity_common.cginc" + +#include "shaders/ffx_fsr2_depth_clip_pass.hlsl" diff --git a/Shaders/ffx_fsr2_depth_clip_pass.compute.meta b/Shaders/ffx_fsr2_depth_clip_pass.compute.meta new file mode 100644 index 0000000..72d9877 --- /dev/null +++ b/Shaders/ffx_fsr2_depth_clip_pass.compute.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: b207de122e2c4b844b89dcd7c5c77c80 +ComputeShaderImporter: + externalObjects: {} + preprocessorOverride: 0 + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/ffx_fsr2_lock_pass.compute b/Shaders/ffx_fsr2_lock_pass.compute new file mode 100644 index 0000000..aed4051 --- /dev/null +++ b/Shaders/ffx_fsr2_lock_pass.compute @@ -0,0 +1,30 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma kernel CS + +#pragma multi_compile_local __ FFX_HALF +#pragma multi_compile_local __ FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR2_OPTION_INVERTED_DEPTH + +#include "ffx_fsr_unity_common.cginc" + +#include "shaders/ffx_fsr2_lock_pass.hlsl" diff --git a/Shaders/ffx_fsr2_lock_pass.compute.meta b/Shaders/ffx_fsr2_lock_pass.compute.meta new file mode 100644 index 0000000..3503b36 --- /dev/null +++ b/Shaders/ffx_fsr2_lock_pass.compute.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 20b7864a7e7258946aaf0f1996febad3 +ComputeShaderImporter: + externalObjects: {} + preprocessorOverride: 0 + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/ffx_fsr2_rcas_pass.compute b/Shaders/ffx_fsr2_rcas_pass.compute new file mode 100644 index 0000000..fb61bb9 --- /dev/null +++ b/Shaders/ffx_fsr2_rcas_pass.compute @@ -0,0 +1,31 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma kernel CS + +#pragma multi_compile_local __ FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR2_OPTION_INVERTED_DEPTH + +#pragma multi_compile_local __ UNITY_FSR_HDRP + +#include "ffx_fsr_unity_common.cginc" + +#include "shaders/ffx_fsr2_rcas_pass.hlsl" diff --git a/Shaders/ffx_fsr2_rcas_pass.compute.meta b/Shaders/ffx_fsr2_rcas_pass.compute.meta new file mode 100644 index 0000000..ecbdb42 --- /dev/null +++ b/Shaders/ffx_fsr2_rcas_pass.compute.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 40815651f0f5d994cb73da9816a7ff9b +ComputeShaderImporter: + externalObjects: {} + preprocessorOverride: 0 + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/ffx_fsr2_reconstruct_previous_depth_pass.compute b/Shaders/ffx_fsr2_reconstruct_previous_depth_pass.compute new file mode 100644 index 0000000..dad772e --- /dev/null +++ b/Shaders/ffx_fsr2_reconstruct_previous_depth_pass.compute @@ -0,0 +1,33 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma kernel CS + +#pragma multi_compile_local __ FFX_HALF +#pragma multi_compile_local __ FFX_FSR2_OPTION_HDR_COLOR_INPUT +#pragma multi_compile_local __ FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR2_OPTION_INVERTED_DEPTH + +#pragma multi_compile_local __ UNITY_FSR_HDRP + +#include "ffx_fsr_unity_common.cginc" + +#include "shaders/ffx_fsr2_reconstruct_previous_depth_pass.hlsl" diff --git a/Shaders/ffx_fsr2_reconstruct_previous_depth_pass.compute.meta b/Shaders/ffx_fsr2_reconstruct_previous_depth_pass.compute.meta new file mode 100644 index 0000000..92da2a6 --- /dev/null +++ b/Shaders/ffx_fsr2_reconstruct_previous_depth_pass.compute.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 5060dfafe45aa67459629186ceb7464e +ComputeShaderImporter: + externalObjects: {} + preprocessorOverride: 0 + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/ffx_fsr2_tcr_autogen_pass.compute b/Shaders/ffx_fsr2_tcr_autogen_pass.compute new file mode 100644 index 0000000..f2bd224 --- /dev/null +++ b/Shaders/ffx_fsr2_tcr_autogen_pass.compute @@ -0,0 +1,32 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma kernel CS + +#pragma multi_compile_local __ FFX_HALF +#pragma multi_compile_local __ FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR2_OPTION_INVERTED_DEPTH + +#pragma multi_compile_local __ UNITY_FSR_HDRP + +#include "ffx_fsr_unity_common.cginc" + +#include "shaders/ffx_fsr2_tcr_autogen_pass.hlsl" diff --git a/Shaders/ffx_fsr2_tcr_autogen_pass.compute.meta b/Shaders/ffx_fsr2_tcr_autogen_pass.compute.meta new file mode 100644 index 0000000..fb8a28a --- /dev/null +++ b/Shaders/ffx_fsr2_tcr_autogen_pass.compute.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: f8b1c27fb6a544b43b38903592240500 +ComputeShaderImporter: + externalObjects: {} + preprocessorOverride: 0 + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/ffx_fsr3upscaler_accumulate_pass.compute b/Shaders/ffx_fsr3upscaler_accumulate_pass.compute new file mode 100644 index 0000000..2a840e3 --- /dev/null +++ b/Shaders/ffx_fsr3upscaler_accumulate_pass.compute @@ -0,0 +1,41 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma kernel CS + +#pragma multi_compile_local __ FFX_HALF +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_APPLY_SHARPENING + +#pragma multi_compile_local __ UNITY_FSR_HDRP + +#include "ffx_fsr_unity_common.cginc" + +// Ensure the correct value is defined for this keyword, as it is used to select one of multiple sampler functions +#ifdef FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE +#undef FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE +#define FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE 1 +#endif + +#include "shaders/ffx_fsr3upscaler_accumulate_pass.hlsl" diff --git a/Shaders/ffx_fsr3upscaler_accumulate_pass.compute.meta b/Shaders/ffx_fsr3upscaler_accumulate_pass.compute.meta new file mode 100644 index 0000000..dbe5282 --- /dev/null +++ b/Shaders/ffx_fsr3upscaler_accumulate_pass.compute.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: c9b45f0ae7673694ba57a4aadfe212e9 +ComputeShaderImporter: + externalObjects: {} + preprocessorOverride: 0 + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/ffx_fsr3upscaler_autogen_reactive_pass.compute b/Shaders/ffx_fsr3upscaler_autogen_reactive_pass.compute new file mode 100644 index 0000000..2206ded --- /dev/null +++ b/Shaders/ffx_fsr3upscaler_autogen_reactive_pass.compute @@ -0,0 +1,32 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma kernel CS + +#pragma multi_compile_local __ FFX_HALF +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH + +#pragma multi_compile_local __ UNITY_FSR_HDRP + +#include "ffx_fsr_unity_common.cginc" + +#include "shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl" diff --git a/Shaders/ffx_fsr3upscaler_autogen_reactive_pass.compute.meta b/Shaders/ffx_fsr3upscaler_autogen_reactive_pass.compute.meta new file mode 100644 index 0000000..1df041b --- /dev/null +++ b/Shaders/ffx_fsr3upscaler_autogen_reactive_pass.compute.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 5716b91fdaa4e9e439df6b96a796fe6e +ComputeShaderImporter: + externalObjects: {} + preprocessorOverride: 0 + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/ffx_fsr3upscaler_debug_view_pass.compute b/Shaders/ffx_fsr3upscaler_debug_view_pass.compute new file mode 100644 index 0000000..4ac0244 --- /dev/null +++ b/Shaders/ffx_fsr3upscaler_debug_view_pass.compute @@ -0,0 +1,35 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma kernel CS + +#pragma multi_compile_local __ FFX_HALF +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_APPLY_SHARPENING + +#pragma multi_compile_local __ UNITY_FSR_HDRP + +#include "ffx_fsr_unity_common.cginc" + +#include "shaders/ffx_fsr3upscaler_debug_view_pass.hlsl" diff --git a/Shaders/ffx_fsr3upscaler_debug_view_pass.compute.meta b/Shaders/ffx_fsr3upscaler_debug_view_pass.compute.meta new file mode 100644 index 0000000..f90cac3 --- /dev/null +++ b/Shaders/ffx_fsr3upscaler_debug_view_pass.compute.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: cb24a71d54164c54eb5e86839acd48c5 +ComputeShaderImporter: + externalObjects: {} + preprocessorOverride: 0 + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/ffx_fsr3upscaler_luma_instability_pass.compute b/Shaders/ffx_fsr3upscaler_luma_instability_pass.compute new file mode 100644 index 0000000..830b66d --- /dev/null +++ b/Shaders/ffx_fsr3upscaler_luma_instability_pass.compute @@ -0,0 +1,35 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma kernel CS + +#pragma multi_compile_local __ FFX_HALF +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_APPLY_SHARPENING + +#pragma multi_compile_local __ UNITY_FSR_HDRP + +#include "ffx_fsr_unity_common.cginc" + +#include "shaders/ffx_fsr3upscaler_luma_instability_pass.hlsl" diff --git a/Shaders/ffx_fsr3upscaler_luma_instability_pass.compute.meta b/Shaders/ffx_fsr3upscaler_luma_instability_pass.compute.meta new file mode 100644 index 0000000..c01e009 --- /dev/null +++ b/Shaders/ffx_fsr3upscaler_luma_instability_pass.compute.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: a135306e6d1857e43a86ef20db2a47fe +ComputeShaderImporter: + externalObjects: {} + preprocessorOverride: 0 + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/ffx_fsr3upscaler_luma_pyramid_pass.compute b/Shaders/ffx_fsr3upscaler_luma_pyramid_pass.compute new file mode 100644 index 0000000..f938ee4 --- /dev/null +++ b/Shaders/ffx_fsr3upscaler_luma_pyramid_pass.compute @@ -0,0 +1,45 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma kernel CS + +#pragma multi_compile_local __ FFX_HALF +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_APPLY_SHARPENING + +#pragma multi_compile_local __ UNITY_FSR_HDRP + +#include "ffx_fsr_unity_common.cginc" + +// Wave operations require shader model 6.0; this can only be enabled when using DXC on D3D12 +// These pragmas are commented out by default as Unity will sometimes ignore the #if's and try to enable these features anyway. +// Uncomment the below lines if you intend to try wave operations on DX12 with the DXC compiler. +//#if defined(UNITY_COMPILER_DXC) && defined(SHADER_API_D3D12) +//#pragma require WaveBasic // Required for WaveGetLaneIndex +//#pragma require WaveBallot // Required for WaveReadLaneAt +//#else +#define FFX_SPD_NO_WAVE_OPERATIONS +//#endif + +#include "shaders/ffx_fsr3upscaler_luma_pyramid_pass.hlsl" diff --git a/Shaders/ffx_fsr3upscaler_luma_pyramid_pass.compute.meta b/Shaders/ffx_fsr3upscaler_luma_pyramid_pass.compute.meta new file mode 100644 index 0000000..9e002c0 --- /dev/null +++ b/Shaders/ffx_fsr3upscaler_luma_pyramid_pass.compute.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: d253be05abcdc80428503d3e4cce3a36 +ComputeShaderImporter: + externalObjects: {} + preprocessorOverride: 0 + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/ffx_fsr3upscaler_prepare_inputs_pass.compute b/Shaders/ffx_fsr3upscaler_prepare_inputs_pass.compute new file mode 100644 index 0000000..425f2c4 --- /dev/null +++ b/Shaders/ffx_fsr3upscaler_prepare_inputs_pass.compute @@ -0,0 +1,36 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma kernel CS + +// TODO: figure out which of these defines are actually used by this shader (for all shader passes) +#pragma multi_compile_local __ FFX_HALF +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_APPLY_SHARPENING + +#pragma multi_compile_local __ UNITY_FSR_HDRP + +#include "ffx_fsr_unity_common.cginc" + +#include "shaders/ffx_fsr3upscaler_prepare_inputs_pass.hlsl" diff --git a/Shaders/ffx_fsr3upscaler_prepare_inputs_pass.compute.meta b/Shaders/ffx_fsr3upscaler_prepare_inputs_pass.compute.meta new file mode 100644 index 0000000..1053c34 --- /dev/null +++ b/Shaders/ffx_fsr3upscaler_prepare_inputs_pass.compute.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 4f59e5b9179d74844ae06a30ae1e0629 +ComputeShaderImporter: + externalObjects: {} + preprocessorOverride: 0 + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/ffx_fsr3upscaler_prepare_reactivity_pass.compute b/Shaders/ffx_fsr3upscaler_prepare_reactivity_pass.compute new file mode 100644 index 0000000..e0e4be6 --- /dev/null +++ b/Shaders/ffx_fsr3upscaler_prepare_reactivity_pass.compute @@ -0,0 +1,35 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma kernel CS + +#pragma multi_compile_local __ FFX_HALF +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_APPLY_SHARPENING + +#pragma multi_compile_local __ UNITY_FSR_HDRP + +#include "ffx_fsr_unity_common.cginc" + +#include "shaders/ffx_fsr3upscaler_prepare_reactivity_pass.hlsl" diff --git a/Shaders/ffx_fsr3upscaler_prepare_reactivity_pass.compute.meta b/Shaders/ffx_fsr3upscaler_prepare_reactivity_pass.compute.meta new file mode 100644 index 0000000..d695f48 --- /dev/null +++ b/Shaders/ffx_fsr3upscaler_prepare_reactivity_pass.compute.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 20e44016ed34b0d4b8de499d1b566c69 +ComputeShaderImporter: + externalObjects: {} + preprocessorOverride: 0 + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/ffx_fsr3upscaler_rcas_pass.compute b/Shaders/ffx_fsr3upscaler_rcas_pass.compute new file mode 100644 index 0000000..0e65dd2 --- /dev/null +++ b/Shaders/ffx_fsr3upscaler_rcas_pass.compute @@ -0,0 +1,31 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma kernel CS + +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH + +#pragma multi_compile_local __ UNITY_FSR_HDRP + +#include "ffx_fsr_unity_common.cginc" + +#include "shaders/ffx_fsr3upscaler_rcas_pass.hlsl" diff --git a/Shaders/ffx_fsr3upscaler_rcas_pass.compute.meta b/Shaders/ffx_fsr3upscaler_rcas_pass.compute.meta new file mode 100644 index 0000000..cd12641 --- /dev/null +++ b/Shaders/ffx_fsr3upscaler_rcas_pass.compute.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 7aaf5cfff022de2499e9b0412f947f6c +ComputeShaderImporter: + externalObjects: {} + preprocessorOverride: 0 + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/ffx_fsr3upscaler_shading_change_pass.compute b/Shaders/ffx_fsr3upscaler_shading_change_pass.compute new file mode 100644 index 0000000..296f646 --- /dev/null +++ b/Shaders/ffx_fsr3upscaler_shading_change_pass.compute @@ -0,0 +1,35 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma kernel CS + +#pragma multi_compile_local __ FFX_HALF +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_APPLY_SHARPENING + +#pragma multi_compile_local __ UNITY_FSR_HDRP + +#include "ffx_fsr_unity_common.cginc" + +#include "shaders/ffx_fsr3upscaler_shading_change_pass.hlsl" diff --git a/Shaders/ffx_fsr3upscaler_shading_change_pass.compute.meta b/Shaders/ffx_fsr3upscaler_shading_change_pass.compute.meta new file mode 100644 index 0000000..c9ae3eb --- /dev/null +++ b/Shaders/ffx_fsr3upscaler_shading_change_pass.compute.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 9a2bff2f97619ed4989d9b0577ba0641 +ComputeShaderImporter: + externalObjects: {} + preprocessorOverride: 0 + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.compute b/Shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.compute new file mode 100644 index 0000000..404b3de --- /dev/null +++ b/Shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.compute @@ -0,0 +1,38 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma kernel CS + +#pragma multi_compile_local __ FFX_HALF +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_APPLY_SHARPENING + +#pragma multi_compile_local __ UNITY_FSR_HDRP + +#include "ffx_fsr_unity_common.cginc" + +// Wave operations require shader model 6.0; this can only be enabled when using DXC on D3D12 +#define FFX_SPD_NO_WAVE_OPERATIONS + +#include "shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.hlsl" diff --git a/Shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.compute.meta b/Shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.compute.meta new file mode 100644 index 0000000..889be6d --- /dev/null +++ b/Shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.compute.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 251e663738905fa4d8817001682d802f +ComputeShaderImporter: + externalObjects: {} + preprocessorOverride: 0 + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/ffx_fsr3upscaler_tcr_autogen_pass.compute b/Shaders/ffx_fsr3upscaler_tcr_autogen_pass.compute new file mode 100644 index 0000000..58c1399 --- /dev/null +++ b/Shaders/ffx_fsr3upscaler_tcr_autogen_pass.compute @@ -0,0 +1,32 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma kernel CS + +#pragma multi_compile_local __ FFX_HALF +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH + +#pragma multi_compile_local __ UNITY_FSR_HDRP + +#include "ffx_fsr_unity_common.cginc" + +#include "shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl" diff --git a/Shaders/ffx_fsr3upscaler_tcr_autogen_pass.compute.meta b/Shaders/ffx_fsr3upscaler_tcr_autogen_pass.compute.meta new file mode 100644 index 0000000..ad42fbb --- /dev/null +++ b/Shaders/ffx_fsr3upscaler_tcr_autogen_pass.compute.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 75cdc6ef23f08ed498d4da511923fcea +ComputeShaderImporter: + externalObjects: {} + preprocessorOverride: 0 + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/ffx_fsr_unity_common.cginc b/Shaders/ffx_fsr_unity_common.cginc new file mode 100644 index 0000000..8b77b56 --- /dev/null +++ b/Shaders/ffx_fsr_unity_common.cginc @@ -0,0 +1,85 @@ +// Copyright (c) 2024 Nico de Poel +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// Suppress a few warnings produced by FFX's HLSL code +#pragma warning(disable: 3078) // Loop control variable conflicts +#pragma warning(disable: 3203) // Signed/unsigned mismatch +#pragma warning(disable: 3556) // Integer divides might be much slower, try using uints if possible + +#define FFX_GPU // Compiling for GPU +#define FFX_HLSL // Compile for plain HLSL + +// Use the DXC shader compiler on modern graphics APIs to enable a few advanced features +// The DXC-related pragmas are disabled by default, as DXC doesn't support all platforms yet and will break on some platforms when enabled. +// Consider this to be an experimental feature. If you want to benefit from 16-bit floating point and wave operations, and don't care about supporting older graphics APIs, then it's worth a try. +//#if defined(SHADER_API_D3D12) || defined(SHADER_API_VULKAN) || defined(SHADER_API_METAL) +//#pragma use_dxc // Using DXC will currently break DX11 support since DX11 and DX12 share the same shader bytecode in Unity. +//#endif + +// Enable half precision data types on platforms that support it +//#if defined(UNITY_COMPILER_DXC) && defined(FFX_HALF) +//#pragma require Native16Bit +//#endif + +// Hack to work around the lack of texture atomics on Metal +#if defined(SHADER_API_METAL) +#define InterlockedAdd(dest, val, orig) { (orig) = (dest); (dest) += (val); } +#define InterlockedMin(dest, val) { (dest) = min((dest), (val)); } +#define InterlockedMax(dest, val) { (dest) = max((dest), (val)); } +#endif + +// Workaround for HDRP using texture arrays for its camera buffers on some platforms +// The below defines are copied from: Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/TextureXR.hlsl +#if defined(UNITY_FSR_HDRP) + // Must be in sync with C# with property useTexArray in TextureXR.cs + #if ((defined(SHADER_API_D3D11) || defined(SHADER_API_D3D12)) && !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_GAMECORE)) || defined(SHADER_API_PSSL) || defined(SHADER_API_VULKAN) + #define UNITY_TEXTURE2D_X_ARRAY_SUPPORTED + #endif + + // Control if TEXTURE2D_X macros will expand to texture arrays + #if defined(UNITY_TEXTURE2D_X_ARRAY_SUPPORTED) && !defined(DISABLE_TEXTURE2D_X_ARRAY) + #define USE_TEXTURE2D_X_AS_ARRAY + #endif + + // Early defines for single-pass instancing + #if defined(STEREO_INSTANCING_ON) && defined(UNITY_TEXTURE2D_X_ARRAY_SUPPORTED) + #define UNITY_STEREO_INSTANCING_ENABLED + #endif + + // Helper macros to handle XR single-pass with Texture2DArray + #if defined(USE_TEXTURE2D_X_AS_ARRAY) + + // Only single-pass stereo instancing used array indexing + #if defined(UNITY_STEREO_INSTANCING_ENABLED) + static uint unity_StereoEyeIndex; + #define SLICE_ARRAY_INDEX unity_StereoEyeIndex + #else + #define SLICE_ARRAY_INDEX 0 + #endif + + // Declare and sample camera buffers as texture arrays + #define UNITY_FSR_TEX2D(type) Texture2DArray + #define UNITY_FSR_RWTEX2D(type) RWTexture2DArray + #define UNITY_FSR_POS(pxPos) FfxUInt32x3(pxPos, SLICE_ARRAY_INDEX) + #define UNITY_FSR_UV(uv) FfxFloat32x3(uv, SLICE_ARRAY_INDEX) + #define UNITY_FSR_GETDIMS(tex, w, h) { FfxUInt32 uElements; (tex).GetDimensions((w), (h), uElements); } + + #endif +#endif diff --git a/Shaders/ffx_fsr_unity_common.cginc.meta b/Shaders/ffx_fsr_unity_common.cginc.meta new file mode 100644 index 0000000..5a68b6c --- /dev/null +++ b/Shaders/ffx_fsr_unity_common.cginc.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 3ce00ba677bb7e14bb91772fd68bfe6b +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders.meta b/Shaders/shaders.meta new file mode 100644 index 0000000..8e1a170 --- /dev/null +++ b/Shaders/shaders.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: c3f8af1cab72f0e46acba11c5820d923 +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_common_types.h b/Shaders/shaders/ffx_common_types.h new file mode 100644 index 0000000..09d4502 --- /dev/null +++ b/Shaders/shaders/ffx_common_types.h @@ -0,0 +1,654 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_COMMON_TYPES_H +#define FFX_COMMON_TYPES_H + +#if defined(FFX_CPU) +#define FFX_PARAMETER_IN +#define FFX_PARAMETER_OUT +#define FFX_PARAMETER_INOUT +#define FFX_PARAMETER_UNIFORM +#elif defined(FFX_HLSL) +#define FFX_PARAMETER_IN in +#define FFX_PARAMETER_OUT out +#define FFX_PARAMETER_INOUT inout +#define FFX_PARAMETER_UNIFORM uniform +#elif defined(FFX_GLSL) +#define FFX_PARAMETER_IN in +#define FFX_PARAMETER_OUT out +#define FFX_PARAMETER_INOUT inout +#define FFX_PARAMETER_UNIFORM const //[cacao_placeholder] until a better fit is found! +#endif // #if defined(FFX_CPU) + +#if defined(FFX_CPU) +/// A typedef for a boolean value. +/// +/// @ingroup CPUTypes +typedef bool FfxBoolean; + +/// A typedef for a unsigned 8bit integer. +/// +/// @ingroup CPUTypes +typedef uint8_t FfxUInt8; + +/// A typedef for a unsigned 16bit integer. +/// +/// @ingroup CPUTypes +typedef uint16_t FfxUInt16; + +/// A typedef for a unsigned 32bit integer. +/// +/// @ingroup CPUTypes +typedef uint32_t FfxUInt32; + +/// A typedef for a unsigned 64bit integer. +/// +/// @ingroup CPUTypes +typedef uint64_t FfxUInt64; + +/// A typedef for a signed 8bit integer. +/// +/// @ingroup CPUTypes +typedef int8_t FfxInt8; + +/// A typedef for a signed 16bit integer. +/// +/// @ingroup CPUTypes +typedef int16_t FfxInt16; + +/// A typedef for a signed 32bit integer. +/// +/// @ingroup CPUTypes +typedef int32_t FfxInt32; + +/// A typedef for a signed 64bit integer. +/// +/// @ingroup CPUTypes +typedef int64_t FfxInt64; + +/// A typedef for a floating point value. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32; + +/// A typedef for a 2-dimensional floating point value. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x2[2]; + +/// A typedef for a 3-dimensional floating point value. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x3[3]; + +/// A typedef for a 4-dimensional floating point value. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x4[4]; + +/// A typedef for a 2x2 floating point matrix. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x2x2[4]; + +/// A typedef for a 3x3 floating point matrix. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x3x3[9]; + +/// A typedef for a 3x4 floating point matrix. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x3x4[12]; + +/// A typedef for a 4x4 floating point matrix. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x4x4[16]; + +/// A typedef for a 2-dimensional 32bit signed integer. +/// +/// @ingroup CPUTypes +typedef int32_t FfxInt32x2[2]; + +/// A typedef for a 3-dimensional 32bit signed integer. +/// +/// @ingroup CPUTypes +typedef int32_t FfxInt32x3[3]; + +/// A typedef for a 4-dimensional 32bit signed integer. +/// +/// @ingroup CPUTypes +typedef int32_t FfxInt32x4[4]; + +/// A typedef for a 2-dimensional 32bit usigned integer. +/// +/// @ingroup CPUTypes +typedef uint32_t FfxUInt32x2[2]; + +/// A typedef for a 3-dimensional 32bit unsigned integer. +/// +/// @ingroup CPUTypes +typedef uint32_t FfxUInt32x3[3]; + +/// A typedef for a 4-dimensional 32bit unsigned integer. +/// +/// @ingroup CPUTypes +typedef uint32_t FfxUInt32x4[4]; +#endif // #if defined(FFX_CPU) + +#if defined(FFX_HLSL) + +#define FfxFloat32Mat4 matrix +#define FfxFloat32Mat3 matrix + +/// A typedef for a boolean value. +/// +/// @ingroup HLSLTypes +typedef bool FfxBoolean; + +#if FFX_HLSL_SM>=62 + +/// @defgroup HLSL62Types HLSL 6.2 And Above Types +/// HLSL 6.2 and above type defines for all commonly used variables +/// +/// @ingroup HLSLTypes + +/// A typedef for a floating point value. +/// +/// @ingroup HLSL62Types +typedef float32_t FfxFloat32; + +/// A typedef for a 2-dimensional floating point value. +/// +/// @ingroup HLSL62Types +typedef float32_t2 FfxFloat32x2; + +/// A typedef for a 3-dimensional floating point value. +/// +/// @ingroup HLSL62Types +typedef float32_t3 FfxFloat32x3; + +/// A typedef for a 4-dimensional floating point value. +/// +/// @ingroup HLSL62Types +typedef float32_t4 FfxFloat32x4; + +/// A [cacao_placeholder] typedef for matrix type until confirmed. +typedef float4x4 FfxFloat32x4x4; +typedef float3x4 FfxFloat32x3x4; +typedef float3x3 FfxFloat32x3x3; +typedef float2x2 FfxFloat32x2x2; + +/// A typedef for a unsigned 32bit integer. +/// +/// @ingroup HLSL62Types +typedef uint32_t FfxUInt32; + +/// A typedef for a 2-dimensional 32bit unsigned integer. +/// +/// @ingroup HLSL62Types +typedef uint32_t2 FfxUInt32x2; + +/// A typedef for a 3-dimensional 32bit unsigned integer. +/// +/// @ingroup HLSL62Types +typedef uint32_t3 FfxUInt32x3; + +/// A typedef for a 4-dimensional 32bit unsigned integer. +/// +/// @ingroup HLSL62Types +typedef uint32_t4 FfxUInt32x4; + +/// A typedef for a signed 32bit integer. +/// +/// @ingroup HLSL62Types +typedef int32_t FfxInt32; + +/// A typedef for a 2-dimensional signed 32bit integer. +/// +/// @ingroup HLSL62Types +typedef int32_t2 FfxInt32x2; + +/// A typedef for a 3-dimensional signed 32bit integer. +/// +/// @ingroup HLSL62Types +typedef int32_t3 FfxInt32x3; + +/// A typedef for a 4-dimensional signed 32bit integer. +/// +/// @ingroup HLSL62Types +typedef int32_t4 FfxInt32x4; + +#else // #if FFX_HLSL_SM>=62 + +/// @defgroup HLSLBaseTypes HLSL 6.1 And Below Types +/// HLSL 6.1 and below type defines for all commonly used variables +/// +/// @ingroup HLSLTypes + +#define FfxFloat32 float +#define FfxFloat32x2 float2 +#define FfxFloat32x3 float3 +#define FfxFloat32x4 float4 + +/// A [cacao_placeholder] typedef for matrix type until confirmed. +#define FfxFloat32x4x4 float4x4 +#define FfxFloat32x3x4 float3x4 +#define FfxFloat32x3x3 float3x3 +#define FfxFloat32x2x2 float2x2 + +/// A typedef for a unsigned 32bit integer. +/// +/// @ingroup GPU +typedef uint FfxUInt32; +typedef uint2 FfxUInt32x2; +typedef uint3 FfxUInt32x3; +typedef uint4 FfxUInt32x4; + +typedef int FfxInt32; +typedef int2 FfxInt32x2; +typedef int3 FfxInt32x3; +typedef int4 FfxInt32x4; + +#endif // #if FFX_HLSL_SM>=62 + +#if FFX_HALF + +#if FFX_HLSL_SM >= 62 + +typedef float16_t FfxFloat16; +typedef float16_t2 FfxFloat16x2; +typedef float16_t3 FfxFloat16x3; +typedef float16_t4 FfxFloat16x4; + +/// A typedef for an unsigned 16bit integer. +/// +/// @ingroup HLSLTypes +typedef uint16_t FfxUInt16; +typedef uint16_t2 FfxUInt16x2; +typedef uint16_t3 FfxUInt16x3; +typedef uint16_t4 FfxUInt16x4; + +/// A typedef for a signed 16bit integer. +/// +/// @ingroup HLSLTypes +typedef int16_t FfxInt16; +typedef int16_t2 FfxInt16x2; +typedef int16_t3 FfxInt16x3; +typedef int16_t4 FfxInt16x4; +#elif SHADER_API_PSSL +#pragma argument(realtypes) // Enable true 16-bit types + +typedef half FfxFloat16; +typedef half2 FfxFloat16x2; +typedef half3 FfxFloat16x3; +typedef half4 FfxFloat16x4; + +/// A typedef for an unsigned 16bit integer. +/// +/// @ingroup GPU +typedef ushort FfxUInt16; +typedef ushort2 FfxUInt16x2; +typedef ushort3 FfxUInt16x3; +typedef ushort4 FfxUInt16x4; + +/// A typedef for a signed 16bit integer. +/// +/// @ingroup GPU +typedef short FfxInt16; +typedef short2 FfxInt16x2; +typedef short3 FfxInt16x3; +typedef short4 FfxInt16x4; +#else // #if FFX_HLSL_SM>=62 +typedef min16float FfxFloat16; +typedef min16float2 FfxFloat16x2; +typedef min16float3 FfxFloat16x3; +typedef min16float4 FfxFloat16x4; + +/// A typedef for an unsigned 16bit integer. +/// +/// @ingroup HLSLTypes +typedef min16uint FfxUInt16; +typedef min16uint2 FfxUInt16x2; +typedef min16uint3 FfxUInt16x3; +typedef min16uint4 FfxUInt16x4; + +/// A typedef for a signed 16bit integer. +/// +/// @ingroup HLSLTypes +typedef min16int FfxInt16; +typedef min16int2 FfxInt16x2; +typedef min16int3 FfxInt16x3; +typedef min16int4 FfxInt16x4; +#endif // #if FFX_HLSL_SM>=62 + +#endif // FFX_HALF + +#endif // #if defined(FFX_HLSL) + +#if defined(FFX_GLSL) + +#define FfxFloat32Mat4 mat4 +#define FfxFloat32Mat3 mat3 + +/// A typedef for a boolean value. +/// +/// @ingroup GLSLTypes +#define FfxBoolean bool +#define FfxFloat32 float +#define FfxFloat32x2 vec2 +#define FfxFloat32x3 vec3 +#define FfxFloat32x4 vec4 +#define FfxUInt32 uint +#define FfxUInt32x2 uvec2 +#define FfxUInt32x3 uvec3 +#define FfxUInt32x4 uvec4 +#define FfxInt32 int +#define FfxInt32x2 ivec2 +#define FfxInt32x3 ivec3 +#define FfxInt32x4 ivec4 + +/// A [cacao_placeholder] typedef for matrix type until confirmed. +#define FfxFloat32x4x4 mat4 +#define FfxFloat32x3x4 mat4x3 +#define FfxFloat32x3x3 mat3 +#define FfxFloat32x2x2 mat2 + +#if FFX_HALF +#define FfxFloat16 float16_t +#define FfxFloat16x2 f16vec2 +#define FfxFloat16x3 f16vec3 +#define FfxFloat16x4 f16vec4 +#define FfxUInt16 uint16_t +#define FfxUInt16x2 u16vec2 +#define FfxUInt16x3 u16vec3 +#define FfxUInt16x4 u16vec4 +#define FfxInt16 int16_t +#define FfxInt16x2 i16vec2 +#define FfxInt16x3 i16vec3 +#define FfxInt16x4 i16vec4 +#endif // FFX_HALF +#endif // #if defined(FFX_GLSL) + +// Global toggles: +// #define FFX_HALF (1) +// #define FFX_HLSL_SM (62) + +#if FFX_HALF && !defined(SHADER_API_PSSL) + +#if FFX_HLSL_SM >= 62 + +#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType##16_t TypeName; +#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType##16_t TypeName; +#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#else //FFX_HLSL_SM>=62 + +#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef min16##BaseComponentType TypeName; +#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) FFX_MIN16_SCALAR( TypeName, BaseComponentType ); +#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ); +#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ); + +#endif //FFX_HLSL_SM>=62 + +#else //FFX_HALF + +#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType TypeName; +#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType TypeName; +#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#endif //FFX_HALF + +#if defined(FFX_GPU) +// Common typedefs: +#if defined(FFX_HLSL) && !defined(SHADER_API_PSSL) +FFX_MIN16_SCALAR( FFX_MIN16_F , float ); +FFX_MIN16_VECTOR( FFX_MIN16_F2, float, 2 ); +FFX_MIN16_VECTOR( FFX_MIN16_F3, float, 3 ); +FFX_MIN16_VECTOR( FFX_MIN16_F4, float, 4 ); + +FFX_MIN16_SCALAR( FFX_MIN16_I, int ); +FFX_MIN16_VECTOR( FFX_MIN16_I2, int, 2 ); +FFX_MIN16_VECTOR( FFX_MIN16_I3, int, 3 ); +FFX_MIN16_VECTOR( FFX_MIN16_I4, int, 4 ); + +FFX_MIN16_SCALAR( FFX_MIN16_U, uint ); +FFX_MIN16_VECTOR( FFX_MIN16_U2, uint, 2 ); +FFX_MIN16_VECTOR( FFX_MIN16_U3, uint, 3 ); +FFX_MIN16_VECTOR( FFX_MIN16_U4, uint, 4 ); + +FFX_16BIT_SCALAR( FFX_F16_t , float ); +FFX_16BIT_VECTOR( FFX_F16_t2, float, 2 ); +FFX_16BIT_VECTOR( FFX_F16_t3, float, 3 ); +FFX_16BIT_VECTOR( FFX_F16_t4, float, 4 ); + +FFX_16BIT_SCALAR( FFX_I16_t, int ); +FFX_16BIT_VECTOR( FFX_I16_t2, int, 2 ); +FFX_16BIT_VECTOR( FFX_I16_t3, int, 3 ); +FFX_16BIT_VECTOR( FFX_I16_t4, int, 4 ); + +FFX_16BIT_SCALAR( FFX_U16_t, uint ); +FFX_16BIT_VECTOR( FFX_U16_t2, uint, 2 ); +FFX_16BIT_VECTOR( FFX_U16_t3, uint, 3 ); +FFX_16BIT_VECTOR( FFX_U16_t4, uint, 4 ); + +#define TYPEDEF_MIN16_TYPES(Prefix) \ +typedef FFX_MIN16_F Prefix##_F; \ +typedef FFX_MIN16_F2 Prefix##_F2; \ +typedef FFX_MIN16_F3 Prefix##_F3; \ +typedef FFX_MIN16_F4 Prefix##_F4; \ +typedef FFX_MIN16_I Prefix##_I; \ +typedef FFX_MIN16_I2 Prefix##_I2; \ +typedef FFX_MIN16_I3 Prefix##_I3; \ +typedef FFX_MIN16_I4 Prefix##_I4; \ +typedef FFX_MIN16_U Prefix##_U; \ +typedef FFX_MIN16_U2 Prefix##_U2; \ +typedef FFX_MIN16_U3 Prefix##_U3; \ +typedef FFX_MIN16_U4 Prefix##_U4; + +#define TYPEDEF_16BIT_TYPES(Prefix) \ +typedef FFX_16BIT_F Prefix##_F; \ +typedef FFX_16BIT_F2 Prefix##_F2; \ +typedef FFX_16BIT_F3 Prefix##_F3; \ +typedef FFX_16BIT_F4 Prefix##_F4; \ +typedef FFX_16BIT_I Prefix##_I; \ +typedef FFX_16BIT_I2 Prefix##_I2; \ +typedef FFX_16BIT_I3 Prefix##_I3; \ +typedef FFX_16BIT_I4 Prefix##_I4; \ +typedef FFX_16BIT_U Prefix##_U; \ +typedef FFX_16BIT_U2 Prefix##_U2; \ +typedef FFX_16BIT_U3 Prefix##_U3; \ +typedef FFX_16BIT_U4 Prefix##_U4; + +#define TYPEDEF_FULL_PRECISION_TYPES(Prefix) \ +typedef FfxFloat32 Prefix##_F; \ +typedef FfxFloat32x2 Prefix##_F2; \ +typedef FfxFloat32x3 Prefix##_F3; \ +typedef FfxFloat32x4 Prefix##_F4; \ +typedef FfxInt32 Prefix##_I; \ +typedef FfxInt32x2 Prefix##_I2; \ +typedef FfxInt32x3 Prefix##_I3; \ +typedef FfxInt32x4 Prefix##_I4; \ +typedef FfxUInt32 Prefix##_U; \ +typedef FfxUInt32x2 Prefix##_U2; \ +typedef FfxUInt32x3 Prefix##_U3; \ +typedef FfxUInt32x4 Prefix##_U4; +#endif // #if defined(FFX_HLSL) + +#if defined(SHADER_API_PSSL) + +#define unorm +#define globallycoherent + +#if FFX_HALF + +#define FFX_MIN16_F half +#define FFX_MIN16_F2 half2 +#define FFX_MIN16_F3 half3 +#define FFX_MIN16_F4 half4 + +#define FFX_MIN16_I short +#define FFX_MIN16_I2 short2 +#define FFX_MIN16_I3 short3 +#define FFX_MIN16_I4 short4 + +#define FFX_MIN16_U ushort +#define FFX_MIN16_U2 ushort2 +#define FFX_MIN16_U3 ushort3 +#define FFX_MIN16_U4 ushort4 + +#define FFX_16BIT_F half +#define FFX_16BIT_F2 half2 +#define FFX_16BIT_F3 half3 +#define FFX_16BIT_F4 half4 + +#define FFX_16BIT_I short +#define FFX_16BIT_I2 short2 +#define FFX_16BIT_I3 short3 +#define FFX_16BIT_I4 short4 + +#define FFX_16BIT_U ushort +#define FFX_16BIT_U2 ushort2 +#define FFX_16BIT_U3 ushort3 +#define FFX_16BIT_U4 ushort4 + +#else // FFX_HALF + +#define FFX_MIN16_F float +#define FFX_MIN16_F2 float2 +#define FFX_MIN16_F3 float3 +#define FFX_MIN16_F4 float4 + +#define FFX_MIN16_I int +#define FFX_MIN16_I2 int2 +#define FFX_MIN16_I3 int3 +#define FFX_MIN16_I4 int4 + +#define FFX_MIN16_U uint +#define FFX_MIN16_U2 uint2 +#define FFX_MIN16_U3 uint3 +#define FFX_MIN16_U4 uint4 + +#define FFX_16BIT_F float +#define FFX_16BIT_F2 float2 +#define FFX_16BIT_F3 float3 +#define FFX_16BIT_F4 float4 + +#define FFX_16BIT_I int +#define FFX_16BIT_I2 int2 +#define FFX_16BIT_I3 int3 +#define FFX_16BIT_I4 int4 + +#define FFX_16BIT_U uint +#define FFX_16BIT_U2 uint2 +#define FFX_16BIT_U3 uint3 +#define FFX_16BIT_U4 uint4 + +#endif // FFX_HALF + +#endif // #if defined(SHADER_API_PSSL) + +#if defined(FFX_GLSL) + +#if FFX_HALF + +#define FFX_MIN16_F float16_t +#define FFX_MIN16_F2 f16vec2 +#define FFX_MIN16_F3 f16vec3 +#define FFX_MIN16_F4 f16vec4 + +#define FFX_MIN16_I int16_t +#define FFX_MIN16_I2 i16vec2 +#define FFX_MIN16_I3 i16vec3 +#define FFX_MIN16_I4 i16vec4 + +#define FFX_MIN16_U uint16_t +#define FFX_MIN16_U2 u16vec2 +#define FFX_MIN16_U3 u16vec3 +#define FFX_MIN16_U4 u16vec4 + +#define FFX_16BIT_F float16_t +#define FFX_16BIT_F2 f16vec2 +#define FFX_16BIT_F3 f16vec3 +#define FFX_16BIT_F4 f16vec4 + +#define FFX_16BIT_I int16_t +#define FFX_16BIT_I2 i16vec2 +#define FFX_16BIT_I3 i16vec3 +#define FFX_16BIT_I4 i16vec4 + +#define FFX_16BIT_U uint16_t +#define FFX_16BIT_U2 u16vec2 +#define FFX_16BIT_U3 u16vec3 +#define FFX_16BIT_U4 u16vec4 + +#else // FFX_HALF + +#define FFX_MIN16_F float +#define FFX_MIN16_F2 vec2 +#define FFX_MIN16_F3 vec3 +#define FFX_MIN16_F4 vec4 + +#define FFX_MIN16_I int +#define FFX_MIN16_I2 ivec2 +#define FFX_MIN16_I3 ivec3 +#define FFX_MIN16_I4 ivec4 + +#define FFX_MIN16_U uint +#define FFX_MIN16_U2 uvec2 +#define FFX_MIN16_U3 uvec3 +#define FFX_MIN16_U4 uvec4 + +#define FFX_16BIT_F float +#define FFX_16BIT_F2 vec2 +#define FFX_16BIT_F3 vec3 +#define FFX_16BIT_F4 vec4 + +#define FFX_16BIT_I int +#define FFX_16BIT_I2 ivec2 +#define FFX_16BIT_I3 ivec3 +#define FFX_16BIT_I4 ivec4 + +#define FFX_16BIT_U uint +#define FFX_16BIT_U2 uvec2 +#define FFX_16BIT_U3 uvec3 +#define FFX_16BIT_U4 uvec4 + +#endif // FFX_HALF + +#endif // #if defined(FFX_GLSL) + +#endif // #if defined(FFX_GPU) +#endif // #ifndef FFX_COMMON_TYPES_H diff --git a/Shaders/shaders/ffx_common_types.h.meta b/Shaders/shaders/ffx_common_types.h.meta new file mode 100644 index 0000000..d0f05eb --- /dev/null +++ b/Shaders/shaders/ffx_common_types.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: ec0f8c94ee9930b438b99b82735d181b +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_core.h b/Shaders/shaders/ffx_core.h new file mode 100644 index 0000000..d1ed144 --- /dev/null +++ b/Shaders/shaders/ffx_core.h @@ -0,0 +1,80 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +/// @defgroup FfxGPU GPU +/// The FidelityFX SDK GPU References +/// +/// @ingroup ffxSDK + +/// @defgroup FfxHLSL HLSL References +/// FidelityFX SDK HLSL GPU References +/// +/// @ingroup FfxGPU + +/// @defgroup FfxGLSL GLSL References +/// FidelityFX SDK GLSL GPU References +/// +/// @ingroup FfxGPU + +/// @defgroup FfxGPUEffects FidelityFX GPU References +/// FidelityFX Effect GPU Reference Documentation +/// +/// @ingroup FfxGPU + +/// @defgroup GPUCore GPU Core +/// GPU defines and functions +/// +/// @ingroup FfxGPU + +#if !defined(FFX_CORE_H) +#define FFX_CORE_H + +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic push +#pragma dxc diagnostic ignored "-Wambig-lit-shift" +#endif //__hlsl_dx_compiler + +#include "ffx_common_types.h" + +#if defined(FFX_CPU) + #include "ffx_core_cpu.h" +#endif // #if defined(FFX_CPU) + +#if defined(FFX_GLSL) && defined(FFX_GPU) + #include "ffx_core_glsl.h" +#endif // #if defined(FFX_GLSL) && defined(FFX_GPU) + +#if defined(FFX_HLSL) && defined(FFX_GPU) + #include "ffx_core_hlsl.h" +#endif // #if defined(FFX_HLSL) && defined(FFX_GPU) + +#if defined(FFX_GPU) + #include "ffx_core_gpu_common.h" + #include "ffx_core_gpu_common_half.h" + #include "ffx_core_portability.h" +#endif // #if defined(FFX_GPU) + +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic pop +#endif //__hlsl_dx_compiler + +#endif // #if !defined(FFX_CORE_H) diff --git a/Shaders/shaders/ffx_core.h.meta b/Shaders/shaders/ffx_core.h.meta new file mode 100644 index 0000000..18282d1 --- /dev/null +++ b/Shaders/shaders/ffx_core.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 223ad96bb47790e4d8658dd82ba950f3 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_core_gpu_common.h b/Shaders/shaders/ffx_core_gpu_common.h new file mode 100644 index 0000000..9f88c94 --- /dev/null +++ b/Shaders/shaders/ffx_core_gpu_common.h @@ -0,0 +1,2736 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +/// A define for a true value in a boolean expression. +/// +/// @ingroup GPUCore +#define FFX_TRUE (true) + +/// A define for a false value in a boolean expression. +/// +/// @ingroup GPUCore +#define FFX_FALSE (false) + +/// A define value for positive infinity. +/// +/// @ingroup GPUCore +#define FFX_POSITIVE_INFINITY_FLOAT ffxAsFloat(0x7f800000u) + +/// A define value for negative infinity. +/// +/// @ingroup GPUCore +#define FFX_NEGATIVE_INFINITY_FLOAT ffxAsFloat(0xff800000u) + +/// A define value for PI. +/// +/// @ingroup GPUCore +#define FFX_PI (3.14159) + +FFX_STATIC const FfxFloat32 FFX_FP16_MIN = 6.10e-05f; +FFX_STATIC const FfxFloat32 FFX_FP16_MAX = 65504.0f; +FFX_STATIC const FfxFloat32 FFX_TONEMAP_EPSILON = 1.0f / FFX_FP16_MAX; + +#define FFX_HAS_FLAG(v, f) ((v & f) == f) + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxMin(FfxFloat32x2 x, FfxFloat32x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxMin(FfxFloat32x3 x, FfxFloat32x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxMin(FfxFloat32x4 x, FfxFloat32x4 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32 ffxMin(FfxInt32 x, FfxInt32 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x2 ffxMin(FfxInt32x2 x, FfxInt32x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x3 ffxMin(FfxInt32x3 x, FfxInt32x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x4 ffxMin(FfxInt32x4 x, FfxInt32x4 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxMin(FfxUInt32x2 x, FfxUInt32x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxMin(FfxUInt32x3 x, FfxUInt32x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxMin(FfxUInt32x4 x, FfxUInt32x4 y) +{ + return min(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxMax(FfxFloat32x2 x, FfxFloat32x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxMax(FfxFloat32x3 x, FfxFloat32x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxMax(FfxFloat32x4 x, FfxFloat32x4 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32 ffxMax(FfxInt32 x, FfxInt32 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x2 ffxMax(FfxInt32x2 x, FfxInt32x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x3 ffxMax(FfxInt32x3 x, FfxInt32x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt32x4 ffxMax(FfxInt32x4 x, FfxInt32x4 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxMax(FfxUInt32x2 x, FfxUInt32x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxMax(FfxUInt32x3 x, FfxUInt32x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxMax(FfxUInt32x4 x, FfxUInt32x4 y) +{ + return max(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat32 ffxPow(FfxFloat32 x, FfxFloat32 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxPow(FfxFloat32x2 x, FfxFloat32x2 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxPow(FfxFloat32x3 x, FfxFloat32x3 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxPow(FfxFloat32x4 x, FfxFloat32x4 y) +{ + return pow(x, y); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat32 ffxSqrt(FfxFloat32 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxSqrt(FfxFloat32x2 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxSqrt(FfxFloat32x3 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxSqrt(FfxFloat32x4 x) +{ + return sqrt(x); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat32 ffxCopySignBit(FfxFloat32 d, FfxFloat32 s) +{ + return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & FfxUInt32(0x80000000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxCopySignBit(FfxFloat32x2 d, FfxFloat32x2 s) +{ + return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast2(0x80000000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxCopySignBit(FfxFloat32x3 d, FfxFloat32x3 s) +{ + return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast3(0x80000000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxCopySignBit(FfxFloat32x4 d, FfxFloat32x4 s) +{ + return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast4(0x80000000u))); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat32 ffxIsSigned(FfxFloat32 m) +{ + return ffxSaturate(m * FfxFloat32(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxIsSigned(FfxFloat32x2 m) +{ + return ffxSaturate(m * ffxBroadcast2(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxIsSigned(FfxFloat32x3 m) +{ + return ffxSaturate(m * ffxBroadcast3(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against for have the sign set. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or positive. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxIsSigned(FfxFloat32x4 m) +{ + return ffxSaturate(m * ffxBroadcast4(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat32 ffxIsGreaterThanZero(FfxFloat32 m) +{ + return ffxSaturate(m * FfxFloat32(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxIsGreaterThanZero(FfxFloat32x2 m) +{ + return ffxSaturate(m * ffxBroadcast2(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxIsGreaterThanZero(FfxFloat32x3 m) +{ + return ffxSaturate(m * ffxBroadcast3(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxIsGreaterThanZero(FfxFloat32x4 m) +{ + return ffxSaturate(m * ffxBroadcast4(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// Convert a 32bit floating point value to sortable integer. +/// +/// - If sign bit=0, flip the sign bit (positives). +/// - If sign bit=1, flip all bits (negatives). +/// +/// The function has the side effects that: +/// - Larger integers are more positive values. +/// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage). +/// +/// @param [in] value The floating point value to make sortable. +/// +/// @returns +/// The sortable integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxFloatToSortableInteger(FfxUInt32 value) +{ + return value ^ ((ffxAShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000)); +} + +/// Convert a sortable integer to a 32bit floating point value. +/// +/// The function has the side effects that: +/// - If sign bit=1, flip the sign bit (positives). +/// - If sign bit=0, flip all bits (negatives). +/// +/// @param [in] value The floating point value to make sortable. +/// +/// @returns +/// The sortable integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxSortableIntegerToFloat(FfxUInt32 value) +{ + return value ^ ((~ffxAShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000)); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateSqrt(FfxFloat32 value) +{ + return ffxAsFloat((ffxAsUInt32(value) >> FfxUInt32(1)) + FfxUInt32(0x1fbc4639)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateReciprocal(FfxFloat32 value) +{ + return ffxAsFloat(FfxUInt32(0x7ef07ebb) - ffxAsUInt32(value)); +} + +/// Calculate a medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateReciprocalMedium(FfxFloat32 value) +{ + FfxFloat32 b = ffxAsFloat(FfxUInt32(0x7ef19fff) - ffxAsUInt32(value)); + return b * (-b * value + FfxFloat32(2.0)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal square root for. +/// +/// @returns +/// An approximation of the reciprocal square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateReciprocalSquareRoot(FfxFloat32 value) +{ + return ffxAsFloat(FfxUInt32(0x5f347d74) - (ffxAsUInt32(value) >> FfxUInt32(1))); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateSqrt(FfxFloat32x2 value) +{ + return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast2(1u)) + ffxBroadcast2(0x1fbc4639u)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateReciprocal(FfxFloat32x2 value) +{ + return ffxAsFloat(ffxBroadcast2(0x7ef07ebbu) - ffxAsUInt32(value)); +} + +/// Calculate a medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateReciprocalMedium(FfxFloat32x2 value) +{ + FfxFloat32x2 b = ffxAsFloat(ffxBroadcast2(0x7ef19fffu) - ffxAsUInt32(value)); + return b * (-b * value + ffxBroadcast2(2.0f)); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateReciprocalSquareRoot(FfxFloat32x2 value) +{ + return ffxAsFloat(ffxBroadcast2(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast2(1u))); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateSqrt(FfxFloat32x3 value) +{ + return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast3(1u)) + ffxBroadcast3(0x1fbc4639u)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateReciprocal(FfxFloat32x3 value) +{ + return ffxAsFloat(ffxBroadcast3(0x7ef07ebbu) - ffxAsUInt32(value)); +} + +/// Calculate a medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateReciprocalMedium(FfxFloat32x3 value) +{ + FfxFloat32x3 b = ffxAsFloat(ffxBroadcast3(0x7ef19fffu) - ffxAsUInt32(value)); + return b * (-b * value + ffxBroadcast3(2.0f)); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateReciprocalSquareRoot(FfxFloat32x3 value) +{ + return ffxAsFloat(ffxBroadcast3(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast3(1u))); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateSqrt(FfxFloat32x4 value) +{ + return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast4(1u)) + ffxBroadcast4(0x1fbc4639u)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateReciprocal(FfxFloat32x4 value) +{ + return ffxAsFloat(ffxBroadcast4(0x7ef07ebbu) - ffxAsUInt32(value)); +} + +/// Calculate a medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateReciprocalMedium(FfxFloat32x4 value) +{ + FfxFloat32x4 b = ffxAsFloat(ffxBroadcast4(0x7ef19fffu) - ffxAsUInt32(value)); + return b * (-b * value + ffxBroadcast4(2.0f)); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateReciprocalSquareRoot(FfxFloat32x4 value) +{ + return ffxAsFloat(ffxBroadcast4(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast4(1u))); +} + +/// Calculate dot product of 'a' and 'b'. +/// +/// @param [in] a First vector input. +/// @param [in] b Second vector input. +/// +/// @returns +/// The value of a dot b. +/// +/// @ingroup GPUCore +FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b) +{ + return dot(a, b); +} + +/// Calculate dot product of 'a' and 'b'. +/// +/// @param [in] a First vector input. +/// @param [in] b Second vector input. +/// +/// @returns +/// The value of a dot b. +/// +/// @ingroup GPUCore +FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b) +{ + return dot(a, b); +} + +/// Calculate dot product of 'a' and 'b'. +/// +/// @param [in] a First vector input. +/// @param [in] b Second vector input. +/// +/// @returns +/// The value of a dot b. +/// +/// @ingroup GPUCore +FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b) +{ + return dot(a, b); +} + + +/// Compute an approximate conversion from PQ to Gamma2 space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and Gamma2. +/// +/// @returns +/// The value a converted into Gamma2. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximatePQToGamma2Medium(FfxFloat32 a) +{ + return a * a * a * a; +} + +/// Compute an approximate conversion from PQ to linear space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and linear. +/// +/// @returns +/// The value a converted into linear. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximatePQToLinear(FfxFloat32 a) +{ + return a * a * a * a * a * a * a * a; +} + +/// Compute an approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateGamma2ToPQ(FfxFloat32 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46)); +} + +/// Compute a more accurate approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateGamma2ToPQMedium(FfxFloat32 a) +{ + FfxFloat32 b = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46)); + FfxFloat32 b4 = b * b * b * b; + return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); +} + +/// Compute a high accuracy approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateGamma2ToPQHigh(FfxFloat32 a) +{ + return ffxSqrt(ffxSqrt(a)); +} + +/// Compute an approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateLinearToPQ(FfxFloat32 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723)); +} + +/// Compute a more accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateLinearToPQMedium(FfxFloat32 a) +{ + FfxFloat32 b = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723)); + FfxFloat32 b8 = b * b * b * b * b * b * b * b; + return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); +} + +/// Compute a very accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32 ffxApproximateLinearToPQHigh(FfxFloat32 a) +{ + return ffxSqrt(ffxSqrt(ffxSqrt(a))); +} + +/// Compute an approximate conversion from PQ to Gamma2 space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and Gamma2. +/// +/// @returns +/// The value a converted into Gamma2. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximatePQToGamma2Medium(FfxFloat32x2 a) +{ + return a * a * a * a; +} + +/// Compute an approximate conversion from PQ to linear space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and linear. +/// +/// @returns +/// The value a converted into linear. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximatePQToLinear(FfxFloat32x2 a) +{ + return a * a * a * a * a * a * a * a; +} + +/// Compute an approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateGamma2ToPQ(FfxFloat32x2 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u)); +} + +/// Compute a more accurate approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateGamma2ToPQMedium(FfxFloat32x2 a) +{ + FfxFloat32x2 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u)); + FfxFloat32x2 b4 = b * b * b * b; + return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); +} + +/// Compute a high accuracy approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateGamma2ToPQHigh(FfxFloat32x2 a) +{ + return ffxSqrt(ffxSqrt(a)); +} + +/// Compute an approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateLinearToPQ(FfxFloat32x2 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u)); +} + +/// Compute a more accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateLinearToPQMedium(FfxFloat32x2 a) +{ + FfxFloat32x2 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u)); + FfxFloat32x2 b8 = b * b * b * b * b * b * b * b; + return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); +} + +/// Compute a very accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateLinearToPQHigh(FfxFloat32x2 a) +{ + return ffxSqrt(ffxSqrt(ffxSqrt(a))); +} + +/// Compute an approximate conversion from PQ to Gamma2 space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and Gamma2. +/// +/// @returns +/// The value a converted into Gamma2. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximatePQToGamma2Medium(FfxFloat32x3 a) +{ + return a * a * a * a; +} + +/// Compute an approximate conversion from PQ to linear space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and linear. +/// +/// @returns +/// The value a converted into linear. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximatePQToLinear(FfxFloat32x3 a) +{ + return a * a * a * a * a * a * a * a; +} + +/// Compute an approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateGamma2ToPQ(FfxFloat32x3 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u)); +} + +/// Compute a more accurate approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateGamma2ToPQMedium(FfxFloat32x3 a) +{ + FfxFloat32x3 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u)); + FfxFloat32x3 b4 = b * b * b * b; + return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); +} + +/// Compute a high accuracy approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateGamma2ToPQHigh(FfxFloat32x3 a) +{ + return ffxSqrt(ffxSqrt(a)); +} + +/// Compute an approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateLinearToPQ(FfxFloat32x3 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u)); +} + +/// Compute a more accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateLinearToPQMedium(FfxFloat32x3 a) +{ + FfxFloat32x3 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u)); + FfxFloat32x3 b8 = b * b * b * b * b * b * b * b; + return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); +} + +/// Compute a very accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateLinearToPQHigh(FfxFloat32x3 a) +{ + return ffxSqrt(ffxSqrt(ffxSqrt(a))); +} + +/// Compute an approximate conversion from PQ to Gamma2 space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and Gamma2. +/// +/// @returns +/// The value a converted into Gamma2. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximatePQToGamma2Medium(FfxFloat32x4 a) +{ + return a * a * a * a; +} + +/// Compute an approximate conversion from PQ to linear space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and linear. +/// +/// @returns +/// The value a converted into linear. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximatePQToLinear(FfxFloat32x4 a) +{ + return a * a * a * a * a * a * a * a; +} + +/// Compute an approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateGamma2ToPQ(FfxFloat32x4 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u)); +} + +/// Compute a more accurate approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateGamma2ToPQMedium(FfxFloat32x4 a) +{ + FfxFloat32x4 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u)); + FfxFloat32x4 b4 = b * b * b * b * b * b * b * b; + return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); +} + +/// Compute a high accuracy approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateGamma2ToPQHigh(FfxFloat32x4 a) +{ + return ffxSqrt(ffxSqrt(a)); +} + +/// Compute an approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateLinearToPQ(FfxFloat32x4 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u)); +} + +/// Compute a more accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateLinearToPQMedium(FfxFloat32x4 a) +{ + FfxFloat32x4 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u)); + FfxFloat32x4 b8 = b * b * b * b * b * b * b * b; + return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); +} + +/// Compute a very accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateLinearToPQHigh(FfxFloat32x4 a) +{ + return ffxSqrt(ffxSqrt(ffxSqrt(a))); +} + +// An approximation of sine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate sine for. +// +// @returns +// The approximate sine of value. +FfxFloat32 ffxParabolicSin(FfxFloat32 value) +{ + return value * abs(value) - value; +} + +// An approximation of sine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate sine for. +// +// @returns +// The approximate sine of value. +FfxFloat32x2 ffxParabolicSin(FfxFloat32x2 x) +{ + return x * abs(x) - x; +} + +// An approximation of cosine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate cosine for. +// +// @returns +// The approximate cosine of value. +FfxFloat32 ffxParabolicCos(FfxFloat32 x) +{ + x = ffxFract(x * FfxFloat32(0.5) + FfxFloat32(0.75)); + x = x * FfxFloat32(2.0) - FfxFloat32(1.0); + return ffxParabolicSin(x); +} + +// An approximation of cosine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate cosine for. +// +// @returns +// The approximate cosine of value. +FfxFloat32x2 ffxParabolicCos(FfxFloat32x2 x) +{ + x = ffxFract(x * ffxBroadcast2(0.5f) + ffxBroadcast2(0.75f)); + x = x * ffxBroadcast2(2.0f) - ffxBroadcast2(1.0f); + return ffxParabolicSin(x); +} + +// An approximation of both sine and cosine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate cosine for. +// +// @returns +// A FfxFloat32x2 containing approximations of both sine and cosine of value. +FfxFloat32x2 ffxParabolicSinCos(FfxFloat32 x) +{ + FfxFloat32 y = ffxFract(x * FfxFloat32(0.5) + FfxFloat32(0.75)); + y = y * FfxFloat32(2.0) - FfxFloat32(1.0); + return ffxParabolicSin(FfxFloat32x2(x, y)); +} + +/// Conditional free logic AND operation using values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt32 ffxZeroOneAnd(FfxUInt32 x, FfxUInt32 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x, FfxUInt32x2 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x, FfxUInt32x3 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x, FfxUInt32x4 y) +{ + return min(x, y); +} + +/// Conditional free logic NOT operation using two values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt32 ffxZeroOneAnd(FfxUInt32 x) +{ + return x ^ FfxUInt32(1); +} + +/// Conditional free logic NOT operation using two values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x) +{ + return x ^ ffxBroadcast2(1u); +} + +/// Conditional free logic NOT operation using two values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x) +{ + return x ^ ffxBroadcast3(1u); +} + +/// Conditional free logic NOT operation using two values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x) +{ + return x ^ ffxBroadcast4(1u); +} + +/// Conditional free logic OR operation using two values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt32 ffxZeroOneOr(FfxUInt32 x, FfxUInt32 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxZeroOneOr(FfxUInt32x2 x, FfxUInt32x2 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxZeroOneOr(FfxUInt32x3 x, FfxUInt32x3 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxZeroOneOr(FfxUInt32x4 x, FfxUInt32x4 y) +{ + return max(x, y); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxUInt32 ffxZeroOneAndToU1(FfxFloat32 x) +{ + return FfxUInt32(FfxFloat32(1.0) - x); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxZeroOneAndToU2(FfxFloat32x2 x) +{ + return FfxUInt32x2(ffxBroadcast2(1.0) - x); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x3 ffxZeroOneAndToU3(FfxFloat32x3 x) +{ + return FfxUInt32x3(ffxBroadcast3(1.0) - x); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxUInt32x4 ffxZeroOneAndToU4(FfxFloat32x4 x) +{ + return FfxUInt32x4(ffxBroadcast4(1.0) - x); +} + +/// Conditional free logic AND operation using two values followed by a NOT operation +/// using the resulting value and a third value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneAndOr(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two values followed by a NOT operation +/// using the resulting value and a third value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneAndOr(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two values followed by a NOT operation +/// using the resulting value and a third value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneAndOr(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two values followed by a NOT operation +/// using the resulting value and a third value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneAndOr(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return ffxSaturate(x * y + z); +} + +/// Given a value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneIsGreaterThanZero(FfxFloat32 x) +{ + return ffxSaturate(x * FfxFloat32(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneIsGreaterThanZero(FfxFloat32x2 x) +{ + return ffxSaturate(x * ffxBroadcast2(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneIsGreaterThanZero(FfxFloat32x3 x) +{ + return ffxSaturate(x * ffxBroadcast3(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneIsGreaterThanZero(FfxFloat32x4 x) +{ + return ffxSaturate(x * ffxBroadcast4(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// Conditional free logic signed NOT operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneAnd(FfxFloat32 x) +{ + return FfxFloat32(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneAnd(FfxFloat32x2 x) +{ + return ffxBroadcast2(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneAnd(FfxFloat32x3 x) +{ + return ffxBroadcast3(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneAnd(FfxFloat32x4 x) +{ + return ffxBroadcast4(1.0) - x; +} + +/// Conditional free logic OR operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneOr(FfxFloat32 x, FfxFloat32 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneOr(FfxFloat32x2 x, FfxFloat32x2 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneOr(FfxFloat32x3 x, FfxFloat32x3 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneOr(FfxFloat32x4 x, FfxFloat32x4 y) +{ + return max(x, y); +} + +/// Choose between two FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneSelect(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + FfxFloat32 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneSelect(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + FfxFloat32x2 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneSelect(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + FfxFloat32x3 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneSelect(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + FfxFloat32x4 r = (-x) * z + z; + return x * y + r; +} + +/// Given a value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat32 ffxZeroOneIsSigned(FfxFloat32 x) +{ + return ffxSaturate(x * FfxFloat32(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxZeroOneIsSigned(FfxFloat32x2 x) +{ + return ffxSaturate(x * ffxBroadcast2(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxZeroOneIsSigned(FfxFloat32x3 x) +{ + return ffxSaturate(x * ffxBroadcast3(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat32x4 ffxZeroOneIsSigned(FfxFloat32x4 x) +{ + return ffxSaturate(x * ffxBroadcast4(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] color The color to convert to Rec. 709. +/// +/// @returns +/// The color in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxRec709FromLinear(FfxFloat32 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099); + return clamp(j.x, color * j.y, pow(color, j.z) * k.x + k.y); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] color The color to convert to Rec. 709. +/// +/// @returns +/// The color in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxRec709FromLinear(FfxFloat32x2 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099); + return clamp(j.xx, color * j.yy, pow(color, j.zz) * k.xx + k.yy); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] color The color to convert to Rec. 709. +/// +/// @returns +/// The color in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxRec709FromLinear(FfxFloat32x3 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099); + return clamp(j.xxx, color * j.yyy, pow(color, j.zzz) * k.xxx + k.yyy); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] color The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxLinearFromRec709(FfxFloat32 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.x), color * j.y, pow(color * k.x + k.y, j.z)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] color The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxLinearFromRec709(FfxFloat32x2 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xx), color * j.yy, pow(color * k.xx + k.yy, j.zz)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] color The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxLinearFromRec709(FfxFloat32x3 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xxx), color * j.yyy, pow(color * k.xxx + k.yyy, j.zzz)); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGamma. +/// +/// @param [in] value The value to convert to gamma space from linear. +/// @param [in] power The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxGammaFromLinear(FfxFloat32 value, FfxFloat32 power) +{ + return pow(value, FfxFloat32(power)); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGamma. +/// +/// @param [in] value The value to convert to gamma space from linear. +/// @param [in] power The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxGammaFromLinear(FfxFloat32x2 value, FfxFloat32 power) +{ + return pow(value, ffxBroadcast2(power)); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGamma. +/// +/// @param [in] value The value to convert to gamma space from linear. +/// @param [in] power The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxGammaFromLinear(FfxFloat32x3 value, FfxFloat32 power) +{ + return pow(value, ffxBroadcast3(power)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] color The value to convert to linear in gamma space. +/// @param [in] power The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxLinearFromGamma(FfxFloat32 color, FfxFloat32 power) +{ + return pow(color, FfxFloat32(power)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] color The value to convert to linear in gamma space. +/// @param [in] power The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxLinearFromGamma(FfxFloat32x2 color, FfxFloat32 power) +{ + return pow(color, ffxBroadcast2(power)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] color The value to convert to linear in gamma space. +/// @param [in] power The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxLinearFromGamma(FfxFloat32x3 color, FfxFloat32 power) +{ + return pow(color, ffxBroadcast3(power)); +} + +/// Compute a PQ value from a linear value. +/// +/// @param [in] value The value to convert to PQ from linear. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxPQFromLinear(FfxFloat32 value) +{ + FfxFloat32 p = pow(value, FfxFloat32(0.159302)); + return pow((FfxFloat32(0.835938) + FfxFloat32(18.8516) * p) / (FfxFloat32(1.0) + FfxFloat32(18.6875) * p), FfxFloat32(78.8438)); +} + +/// Compute a PQ value from a linear value. +/// +/// @param [in] value The value to convert to PQ from linear. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxPQFromLinear(FfxFloat32x2 value) +{ + FfxFloat32x2 p = pow(value, ffxBroadcast2(0.159302)); + return pow((ffxBroadcast2(0.835938) + ffxBroadcast2(18.8516) * p) / (ffxBroadcast2(1.0) + ffxBroadcast2(18.6875) * p), ffxBroadcast2(78.8438)); +} + +/// Compute a PQ value from a linear value. +/// +/// @param [in] value The value to convert to PQ from linear. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxPQFromLinear(FfxFloat32x3 value) +{ + FfxFloat32x3 p = pow(value, ffxBroadcast3(0.159302)); + return pow((ffxBroadcast3(0.835938) + ffxBroadcast3(18.8516) * p) / (ffxBroadcast3(1.0) + ffxBroadcast3(18.6875) * p), ffxBroadcast3(78.8438)); +} + +/// Compute a linear value from a value in a PQ space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in PQ space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxLinearFromPQ(FfxFloat32 value) +{ + FfxFloat32 p = pow(value, FfxFloat32(0.0126833)); + return pow(ffxSaturate(p - FfxFloat32(0.835938)) / (FfxFloat32(18.8516) - FfxFloat32(18.6875) * p), FfxFloat32(6.27739)); +} + +/// Compute a linear value from a value in a PQ space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in PQ space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxLinearFromPQ(FfxFloat32x2 value) +{ + FfxFloat32x2 p = pow(value, ffxBroadcast2(0.0126833)); + return pow(ffxSaturate(p - ffxBroadcast2(0.835938)) / (ffxBroadcast2(18.8516) - ffxBroadcast2(18.6875) * p), ffxBroadcast2(6.27739)); +} + +/// Compute a linear value from a value in a PQ space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in PQ space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxLinearFromPQ(FfxFloat32x3 value) +{ + FfxFloat32x3 p = pow(value, ffxBroadcast3(0.0126833)); + return pow(ffxSaturate(p - ffxBroadcast3(0.835938)) / (ffxBroadcast3(18.8516) - ffxBroadcast3(18.6875) * p), ffxBroadcast3(6.27739)); +} + +/// Compute an SRGB value from a linear value. +/// +/// @param [in] value The value to convert to SRGB from linear. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxSrgbFromLinear(FfxFloat32 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); + return clamp(j.x, value * j.y, pow(value, j.z) * k.x + k.y); +} + +/// Compute an SRGB value from a linear value. +/// +/// @param [in] value The value to convert to SRGB from linear. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxSrgbFromLinear(FfxFloat32x2 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); + return clamp(j.xx, value * j.yy, pow(value, j.zz) * k.xx + k.yy); +} + +/// Compute an SRGB value from a linear value. +/// +/// @param [in] value The value to convert to SRGB from linear. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxSrgbFromLinear(FfxFloat32x3 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); + return clamp(j.xxx, value * j.yyy, pow(value, j.zzz) * k.xxx + k.yyy); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxLinearFromSrgb(FfxFloat32 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.x), value * j.y, pow(value * k.x + k.y, j.z)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xx), value * j.yy, pow(value * k.xx + k.yy, j.zz)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxLinearFromSrgb(FfxFloat32x3 value) +{ + FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xxx), value * j.yyy, pow(value * k.xxx + k.yyy, j.zzz)); +} + +/// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear. +/// +/// Remap illustration: +/// +/// 543210 +/// ~~~~~~ +/// ..xxx. +/// yy...y +/// +/// @param [in] a The input 1D coordinates to remap. +/// +/// @returns +/// The remapped 2D coordinates. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxRemapForQuad(FfxUInt32 a) +{ + return FfxUInt32x2(ffxBitfieldExtract(a, 1u, 3u), ffxBitfieldInsertMask(ffxBitfieldExtract(a, 3u, 3u), a, 1u)); +} + +/// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions. +/// +/// The 64-wide lane indices to 8x8 remapping is performed as follows: +/// +/// 00 01 08 09 10 11 18 19 +/// 02 03 0a 0b 12 13 1a 1b +/// 04 05 0c 0d 14 15 1c 1d +/// 06 07 0e 0f 16 17 1e 1f +/// 20 21 28 29 30 31 38 39 +/// 22 23 2a 2b 32 33 3a 3b +/// 24 25 2c 2d 34 35 3c 3d +/// 26 27 2e 2f 36 37 3e 3f +/// +/// @param [in] a The input 1D coordinate to remap. +/// +/// @returns +/// The remapped 2D coordinates. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxRemapForWaveReduction(FfxUInt32 a) +{ + return FfxUInt32x2(ffxBitfieldInsertMask(ffxBitfieldExtract(a, 2u, 3u), a, 1u), ffxBitfieldInsertMask(ffxBitfieldExtract(a, 3u, 3u), ffxBitfieldExtract(a, 1u, 2u), 2u)); +} diff --git a/Shaders/shaders/ffx_core_gpu_common.h.meta b/Shaders/shaders/ffx_core_gpu_common.h.meta new file mode 100644 index 0000000..070d7a5 --- /dev/null +++ b/Shaders/shaders/ffx_core_gpu_common.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 4950d6c78609df549a0ed96137bf3bf1 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_core_gpu_common_half.h b/Shaders/shaders/ffx_core_gpu_common_half.h new file mode 100644 index 0000000..1cb780b --- /dev/null +++ b/Shaders/shaders/ffx_core_gpu_common_half.h @@ -0,0 +1,2981 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#if FFX_HALF +#if FFX_HLSL_SM >= 62 +/// A define value for 16bit positive infinity. +/// +/// @ingroup GPUCore +#define FFX_POSITIVE_INFINITY_HALF FFX_TO_FLOAT16((uint16_t)0x7c00u) + +/// A define value for 16bit negative infinity. +/// +/// @ingroup GPUCore +#define FFX_NEGATIVE_INFINITY_HALF FFX_TO_FLOAT16((uint16_t)0xfc00u) +#else +/// A define value for 16bit positive infinity. +/// +/// @ingroup GPUCore +#define FFX_POSITIVE_INFINITY_HALF FFX_TO_FLOAT16(0x7c00u) + +/// A define value for 16bit negative infinity. +/// +/// @ingroup GPUCore +#define FFX_NEGATIVE_INFINITY_HALF FFX_TO_FLOAT16(0xfc00u) +#endif // #if FFX_HLSL_SM>=62 + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16 ffxMin(FfxFloat16 x, FfxFloat16 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxMin(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxMin(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxMin(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16 ffxMin(FfxInt16 x, FfxInt16 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x2 ffxMin(FfxInt16x2 x, FfxInt16x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x3 ffxMin(FfxInt16x3 x, FfxInt16x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x4 ffxMin(FfxInt16x4 x, FfxInt16x4 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16 ffxMin(FfxUInt16 x, FfxUInt16 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxMin(FfxUInt16x2 x, FfxUInt16x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxMin(FfxUInt16x3 x, FfxUInt16x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxMin(FfxUInt16x4 x, FfxUInt16x4 y) +{ + return min(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16 ffxMax(FfxFloat16 x, FfxFloat16 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxMax(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxMax(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxMax(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16 ffxMax(FfxInt16 x, FfxInt16 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x2 ffxMax(FfxInt16x2 x, FfxInt16x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x3 ffxMax(FfxInt16x3 x, FfxInt16x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxInt16x4 ffxMax(FfxInt16x4 x, FfxInt16x4 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16 ffxMax(FfxUInt16 x, FfxUInt16 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxMax(FfxUInt16x2 x, FfxUInt16x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxMax(FfxUInt16x3 x, FfxUInt16x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxMax(FfxUInt16x4 x, FfxUInt16x4 y) +{ + return max(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat16 ffxPow(FfxFloat16 x, FfxFloat16 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPow(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxPow(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxPow(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return pow(x, y); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat16 ffxSqrt(FfxFloat16 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxSqrt(FfxFloat16x2 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxSqrt(FfxFloat16x3 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxSqrt(FfxFloat16x4 x) +{ + return sqrt(x); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat16 ffxCopySignBitHalf(FfxFloat16 d, FfxFloat16 s) +{ + return FFX_TO_FLOAT16(FFX_TO_UINT16(d) | (FFX_TO_UINT16(s) & FFX_BROADCAST_UINT16(0x8000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxCopySignBitHalf(FfxFloat16x2 d, FfxFloat16x2 s) +{ + return FFX_TO_FLOAT16X2(FFX_TO_UINT16X2(d) | (FFX_TO_UINT16X2(s) & FFX_BROADCAST_UINT16X2(0x8000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxCopySignBitHalf(FfxFloat16x3 d, FfxFloat16x3 s) +{ + return FFX_TO_FLOAT16X3(FFX_TO_UINT16X3(d) | (FFX_TO_UINT16X3(s) & FFX_BROADCAST_UINT16X3(0x8000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxCopySignBitHalf(FfxFloat16x4 d, FfxFloat16x4 s) +{ + return FFX_TO_FLOAT16X4(FFX_TO_UINT16X4(d) | (FFX_TO_UINT16X4(s) & FFX_BROADCAST_UINT16X4(0x8000u))); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat16 ffxIsSignedHalf(FfxFloat16 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxIsSignedHalf(FfxFloat16x2 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxIsSignedHalf(FfxFloat16x3 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxIsSignedHalf(FfxFloat16x4 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat16 ffxIsGreaterThanZeroHalf(FfxFloat16 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxIsGreaterThanZeroHalf(FfxFloat16x2 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxIsGreaterThanZeroHalf(FfxFloat16x3 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxIsGreaterThanZeroHalf(FfxFloat16x4 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF)); +} + +/// Convert a 16bit floating point value to sortable integer. +/// +/// - If sign bit=0, flip the sign bit (positives). +/// - If sign bit=1, flip all bits (negatives). +/// +/// The function has the side effects that: +/// - Larger integers are more positive values. +/// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage). +/// +/// @param [in] x The floating point value to make sortable. +/// +/// @returns +/// The sortable integer value. +/// +/// @ingroup GPUCore +FfxUInt16 ffxFloatToSortableIntegerHalf(FfxUInt16 x) +{ + return x ^ ((ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16(15))) | FFX_BROADCAST_UINT16(0x8000)); +} + +/// Convert a sortable integer to a 16bit floating point value. +/// +/// The function has the side effects that: +/// - If sign bit=1, flip the sign bit (positives). +/// - If sign bit=0, flip all bits (negatives). +/// +/// @param [in] x The sortable integer value to make floating point. +/// +/// @returns +/// The floating point value. +/// +/// @ingroup GPUCore +FfxUInt16 ffxSortableIntegerToFloatHalf(FfxUInt16 x) +{ + return x ^ ((~ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16(15))) | FFX_BROADCAST_UINT16(0x8000)); +} + +/// Convert a pair of 16bit floating point values to a pair of sortable integers. +/// +/// - If sign bit=0, flip the sign bit (positives). +/// - If sign bit=1, flip all bits (negatives). +/// +/// The function has the side effects that: +/// - Larger integers are more positive values. +/// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage). +/// +/// @param [in] x The floating point values to make sortable. +/// +/// @returns +/// The sortable integer values. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxFloatToSortableIntegerHalf(FfxUInt16x2 x) +{ + return x ^ ((ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16X2(15))) | FFX_BROADCAST_UINT16X2(0x8000)); +} + +/// Convert a pair of sortable integers to a pair of 16bit floating point values. +/// +/// The function has the side effects that: +/// - If sign bit=1, flip the sign bit (positives). +/// - If sign bit=0, flip all bits (negatives). +/// +/// @param [in] x The sortable integer values to make floating point. +/// +/// @returns +/// The floating point values. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxSortableIntegerToFloatHalf(FfxUInt16x2 x) +{ + return x ^ ((~ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16X2(15))) | FFX_BROADCAST_UINT16X2(0x8000)); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// [Zero] Y0 [Zero] X0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesZeroY0ZeroX0(FfxUInt32x2 i) +{ + return ((i.x) & 0xffu) | ((i.y << 16) & 0xff0000u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// [Zero] Y1 [Zero] X1 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesZeroY1ZeroX1(FfxUInt32x2 i) +{ + return ((i.x >> 8) & 0xffu) | ((i.y << 8) & 0xff0000u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// [Zero] Y2 [Zero] X2 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesZeroY2ZeroX2(FfxUInt32x2 i) +{ + return ((i.x >> 16) & 0xffu) | ((i.y) & 0xff0000u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// [Zero] Y3 [Zero] X3 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesZeroY3ZeroX3(FfxUInt32x2 i) +{ + return ((i.x >> 24) & 0xffu) | ((i.y >> 8) & 0xff0000u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 Y2 Y1 X0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3Y2Y1X0(FfxUInt32x2 i) +{ + return ((i.x) & 0x000000ffu) | (i.y & 0xffffff00u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 Y2 Y1 X2 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3Y2Y1X2(FfxUInt32x2 i) +{ + return ((i.x >> 16) & 0x000000ffu) | (i.y & 0xffffff00u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 Y2 X0 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3Y2X0Y0(FfxUInt32x2 i) +{ + return ((i.x << 8) & 0x0000ff00u) | (i.y & 0xffff00ffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 Y2 X2 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3Y2X2Y0(FfxUInt32x2 i) +{ + return ((i.x >> 8) & 0x0000ff00u) | (i.y & 0xffff00ffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 X0 Y1 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3X0Y1Y0(FfxUInt32x2 i) +{ + return ((i.x << 16) & 0x00ff0000u) | (i.y & 0xff00ffffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 X2 Y1 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY3X2Y1Y0(FfxUInt32x2 i) +{ + return ((i.x) & 0x00ff0000u) | (i.y & 0xff00ffffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// X0 Y2 Y1 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesX0Y2Y1Y0(FfxUInt32x2 i) +{ + return ((i.x << 24) & 0xff000000u) | (i.y & 0x00ffffffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// X2 Y2 Y1 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesX2Y2Y1Y0(FfxUInt32x2 i) +{ + return ((i.x << 8) & 0xff000000u) | (i.y & 0x00ffffffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y2 X2 Y0 X0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY2X2Y0X0(FfxUInt32x2 i) +{ + return ((i.x) & 0x00ff00ffu) | ((i.y << 8) & 0xff00ff00u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y2 Y0 X2 X0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPUCore +FfxUInt32 ffxPackBytesY2Y0X2X0(FfxUInt32x2 i) +{ + return (((i.x) & 0xffu) | ((i.x >> 8) & 0xff00u) | ((i.y << 16) & 0xff0000u) | ((i.y << 8) & 0xff000000u)); +} + +/// Takes two Float16x2 values x and y, normalizes them and builds a single Uint16x2 value in the format {{x0,y0},{x1,y1}}. +/// +/// @param [in] x The first float16x2 value to pack. +/// @param [in] y The second float16x2 value to pack. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxPackX0Y0X1Y1UnsignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y) +{ + x *= FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0); + y *= FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0); + return FFX_UINT32_TO_UINT16X2(ffxPackBytesY2X2Y0X0(FfxUInt32x2(FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(x)), FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(y))))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7], +/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// r=ffxPermuteUByte0Float16x2ToUint2(d,i) +/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits +/// Where 'k1' is an SGPR with 0x???? +/// Where 'k2' is an SGPR with 0x???? +/// V_PK_FMA_F16 i,i,k0.x,0 +/// V_PERM_B32 r.x,i,i,k1 +/// V_PERM_B32 r.y,i,i,k2 +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteUByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15], +/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// r=ffxPermuteUByte1Float16x2ToUint2(d,i) +/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits +/// Where 'k1' is an SGPR with 0x???? +/// Where 'k2' is an SGPR with 0x???? +/// V_PK_FMA_F16 i,i,k0.x,0 +/// V_PERM_B32 r.x,i,i,k1 +/// V_PERM_B32 r.y,i,i,k2 +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteUByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23], +/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops. +/// +/// r=ffxPermuteUByte2Float16x2ToUint2(d,i) +/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits +/// Where 'k1' is an SGPR with 0x???? +/// Where 'k2' is an SGPR with 0x???? +/// V_PK_FMA_F16 i,i,k0.x,0 +/// V_PERM_B32 r.x,i,i,k1 +/// V_PERM_B32 r.y,i,i,k2 +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteUByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31], +/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops. +/// +/// r=ffxPermuteUByte3Float16x2ToUint2(d,i) +/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits +/// Where 'k1' is an SGPR with 0x???? +/// Where 'k2' is an SGPR with 0x???? +/// V_PK_FMA_F16 i,i,k0.x,0 +/// V_PERM_B32 r.x,i,i,k1 +/// V_PERM_B32 r.y,i,i,k2 +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteUByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); + return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteUByte0Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteUByte1Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteUByte2Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteUByte3Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); +} + +/// Takes two Float16x2 values x and y, normalizes them and builds a single Uint16x2 value in the format {{x0,y0},{x1,y1}}. +/// +/// @param [in] x The first float16x2 value to pack. +/// @param [in] y The second float16x2 value to pack. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxPackX0Y0X1Y1SignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y) +{ + x = x * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0); + y = y * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0); + return FFX_UINT32_TO_UINT16X2(ffxPackBytesY2X2Y0X0(FfxUInt32x2(FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(x)), FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(y))))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7], +/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15], +/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23], +/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31], +/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); + return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7], +/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). +/// This is useful if there is a desire for cleared values to decode as zero. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteZeroBasedSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; + return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15], +/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). +/// This is useful if there is a desire for cleared values to decode as zero. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteZeroBasedSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; + return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23], +/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops. +/// +/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). +/// This is useful if there is a desire for cleared values to decode as zero. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteZeroBasedSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; + return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31], +/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops. +/// +/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). +/// This is useful if there is a desire for cleared values to decode as zero. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPUCore +FfxUInt32x2 ffxPermuteZeroBasedSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; + return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteSByte0Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteSByte1Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteSByte2Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteSByte3Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteZeroBasedSByte0Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteZeroBasedSByte1Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteZeroBasedSByte2Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxPermuteZeroBasedSByte3Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Calculate a half-precision low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16 ffxApproximateSqrtHalf(FfxFloat16 a) +{ + return FFX_TO_FLOAT16((FFX_TO_UINT16(a) >> FFX_BROADCAST_UINT16(1)) + FFX_BROADCAST_UINT16(0x1de2)); +} + +/// Calculate a half-precision low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxApproximateSqrtHalf(FfxFloat16x2 a) +{ + return FFX_TO_FLOAT16X2((FFX_TO_UINT16X2(a) >> FFX_BROADCAST_UINT16X2(1)) + FFX_BROADCAST_UINT16X2(0x1de2)); +} + +/// Calculate a half-precision low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxApproximateSqrtHalf(FfxFloat16x3 a) +{ + return FFX_TO_FLOAT16X3((FFX_TO_UINT16X3(a) >> FFX_BROADCAST_UINT16X3(1)) + FFX_BROADCAST_UINT16X3(0x1de2)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16 ffxApproximateReciprocalHalf(FfxFloat16 a) +{ + return FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x7784) - FFX_TO_UINT16(a)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxApproximateReciprocalHalf(FfxFloat16x2 a) +{ + return FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x7784) - FFX_TO_UINT16X2(a)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxApproximateReciprocalHalf(FfxFloat16x3 a) +{ + return FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x7784) - FFX_TO_UINT16X3(a)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxApproximateReciprocalHalf(FfxFloat16x4 a) +{ + return FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x7784) - FFX_TO_UINT16X4(a)); +} + +/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat16 ffxApproximateReciprocalMediumHalf(FfxFloat16 a) +{ + FfxFloat16 b = FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x778d) - FFX_TO_UINT16(a)); + return b * (-b * a + FFX_BROADCAST_FLOAT16(2.0)); +} + +/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxApproximateReciprocalMediumHalf(FfxFloat16x2 a) +{ + FfxFloat16x2 b = FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x778d) - FFX_TO_UINT16X2(a)); + return b * (-b * a + FFX_BROADCAST_FLOAT16X2(2.0)); +} + +/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxApproximateReciprocalMediumHalf(FfxFloat16x3 a) +{ + FfxFloat16x3 b = FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x778d) - FFX_TO_UINT16X3(a)); + return b * (-b * a + FFX_BROADCAST_FLOAT16X3(2.0)); +} + +/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxApproximateReciprocalMediumHalf(FfxFloat16x4 a) +{ + FfxFloat16x4 b = FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x778d) - FFX_TO_UINT16X4(a)); + return b * (-b * a + FFX_BROADCAST_FLOAT16X4(2.0)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. +/// +/// @returns +/// An approximation of the reciprocal of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16 ffxApproximateReciprocalSquareRootHalf(FfxFloat16 a) +{ + return FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x59a3) - (FFX_TO_UINT16(a) >> FFX_BROADCAST_UINT16(1))); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. +/// +/// @returns +/// An approximation of the reciprocal of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x2 a) +{ + return FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x59a3) - (FFX_TO_UINT16X2(a) >> FFX_BROADCAST_UINT16X2(1))); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. +/// +/// @returns +/// An approximation of the reciprocal of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x3 a) +{ + return FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x59a3) - (FFX_TO_UINT16X3(a) >> FFX_BROADCAST_UINT16X3(1))); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. +/// +/// @returns +/// An approximation of the reciprocal of the square root, estimated to low quality. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x4 a) +{ + return FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x59a3) - (FFX_TO_UINT16X4(a) >> FFX_BROADCAST_UINT16X4(1))); +} + +/// An approximation of sine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate sine for. +/// +/// @returns +/// The approximate sine of value. +FfxFloat16 ffxParabolicSinHalf(FfxFloat16 x) +{ + return x * abs(x) - x; +} + +/// An approximation of sine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate sine for. +/// +/// @returns +/// The approximate sine of value. +FfxFloat16x2 ffxParabolicSinHalf(FfxFloat16x2 x) +{ + return x * abs(x) - x; +} + +/// An approximation of cosine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate cosine for. +/// +/// @returns +/// The approximate cosine of value. +FfxFloat16 ffxParabolicCosHalf(FfxFloat16 x) +{ + x = ffxFract(x * FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16(0.75)); + x = x * FFX_BROADCAST_FLOAT16(2.0) - FFX_BROADCAST_FLOAT16(1.0); + return ffxParabolicSinHalf(x); +} + +/// An approximation of cosine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate cosine for. +/// +/// @returns +/// The approximate cosine of value. +FfxFloat16x2 ffxParabolicCosHalf(FfxFloat16x2 x) +{ + x = ffxFract(x * FFX_BROADCAST_FLOAT16X2(0.5) + FFX_BROADCAST_FLOAT16X2(0.75)); + x = x * FFX_BROADCAST_FLOAT16X2(2.0) - FFX_BROADCAST_FLOAT16X2(1.0); + return ffxParabolicSinHalf(x); +} + +/// An approximation of both sine and cosine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate cosine for. +/// +/// @returns +/// A FfxFloat32x2 containing approximations of both sine and cosine of value. +FfxFloat16x2 ffxParabolicSinCosHalf(FfxFloat16 x) +{ + FfxFloat16 y = ffxFract(x * FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16(0.75)); + y = y * FFX_BROADCAST_FLOAT16(2.0) - FFX_BROADCAST_FLOAT16(1.0); + return ffxParabolicSinHalf(FfxFloat16x2(x, y)); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt16 ffxZeroOneAndHalf(FfxUInt16 x, FfxUInt16 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxZeroOneAndHalf(FfxUInt16x2 x, FfxUInt16x2 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxZeroOneAndHalf(FfxUInt16x3 x, FfxUInt16x3 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxZeroOneAndHalf(FfxUInt16x4 x, FfxUInt16x4 y) +{ + return min(x, y); +} + +/// Conditional free logic NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt16 ffxZeroOneNotHalf(FfxUInt16 x) +{ + return x ^ FFX_BROADCAST_UINT16(1); +} + +/// Conditional free logic NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxZeroOneNotHalf(FfxUInt16x2 x) +{ + return x ^ FFX_BROADCAST_UINT16X2(1); +} + +/// Conditional free logic NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxZeroOneNotHalf(FfxUInt16x3 x) +{ + return x ^ FFX_BROADCAST_UINT16X3(1); +} + +/// Conditional free logic NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxZeroOneNotHalf(FfxUInt16x4 x) +{ + return x ^ FFX_BROADCAST_UINT16X4(1); +} + +/// Conditional free logic OR operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt16 ffxZeroOneOrHalf(FfxUInt16 x, FfxUInt16 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxZeroOneOrHalf(FfxUInt16x2 x, FfxUInt16x2 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxZeroOneOrHalf(FfxUInt16x3 x, FfxUInt16x3 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxZeroOneOrHalf(FfxUInt16x4 x, FfxUInt16x4 y) +{ + return max(x, y); +} + +/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. +/// +/// @param [in] x The value to converted to a Uint. +/// +/// @returns +/// The converted Uint value. +/// +/// @ingroup GPUCore +FfxUInt16 ffxZeroOneFloat16ToUint16(FfxFloat16 x) +{ + return FFX_TO_UINT16(x * FFX_TO_FLOAT16(FFX_TO_UINT16(1))); +} + +/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. +/// +/// @param [in] x The value to converted to a Uint. +/// +/// @returns +/// The converted Uint value. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxZeroOneFloat16x2ToUint16x2(FfxFloat16x2 x) +{ + return FFX_TO_UINT16X2(x * FFX_TO_FLOAT16X2(FfxUInt16x2(1, 1))); +} + +/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. +/// +/// @param [in] x The value to converted to a Uint. +/// +/// @returns +/// The converted Uint value. +/// +/// @ingroup GPUCore +FfxUInt16x3 ffxZeroOneFloat16x3ToUint16x3(FfxFloat16x3 x) +{ + return FFX_TO_UINT16X3(x * FFX_TO_FLOAT16X3(FfxUInt16x3(1, 1, 1))); +} + +/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. +/// +/// @param [in] x The value to converted to a Uint. +/// +/// @returns +/// The converted Uint value. +/// +/// @ingroup GPUCore +FfxUInt16x4 ffxZeroOneFloat16x4ToUint16x4(FfxFloat16x4 x) +{ + return FFX_TO_UINT16X4(x * FFX_TO_FLOAT16X4(FfxUInt16x4(1, 1, 1, 1))); +} + +/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. +/// +/// @param [in] x The value to converted to a half-precision FfxFloat32. +/// +/// @returns +/// The converted half-precision FfxFloat32 value. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneUint16ToFloat16(FfxUInt16 x) +{ + return FFX_TO_FLOAT16(x * FFX_TO_UINT16(FFX_TO_FLOAT16(1.0))); +} + +/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. +/// +/// @param [in] x The value to converted to a half-precision FfxFloat32. +/// +/// @returns +/// The converted half-precision FfxFloat32 value. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneUint16x2ToFloat16x2(FfxUInt16x2 x) +{ + return FFX_TO_FLOAT16X2(x * FFX_TO_UINT16X2(FfxUInt16x2(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0)))); +} + +/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. +/// +/// @param [in] x The value to converted to a half-precision FfxFloat32. +/// +/// @returns +/// The converted half-precision FfxFloat32 value. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneUint16x3ToFloat16x3(FfxUInt16x3 x) +{ + return FFX_TO_FLOAT16X3(x * FFX_TO_UINT16X3(FfxUInt16x3(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0)))); +} + +/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. +/// +/// @param [in] x The value to converted to a half-precision FfxFloat32. +/// +/// @returns +/// The converted half-precision FfxFloat32 value. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneUint16x4ToFloat16x4(FfxUInt16x4 x) +{ + return FFX_TO_FLOAT16X4(x * FFX_TO_UINT16X4(FfxUInt16x4(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0)))); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneAndHalf(FfxFloat16 x, FfxFloat16 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneAndHalf(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneAndHalf(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneAndHalf(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return min(x, y); +} + +/// Conditional free logic AND NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND NOT operator. +/// @param [in] y The second value to be fed into the AND NOT operator. +/// +/// @returns +/// Result of the AND NOT operation. +/// +/// @ingroup GPUCore +FfxFloat16 ffxSignedZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y) +{ + return (-x) * y + FFX_BROADCAST_FLOAT16(1.0); +} + +/// Conditional free logic AND NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND NOT operator. +/// @param [in] y The second value to be fed into the AND NOT operator. +/// +/// @returns +/// Result of the AND NOT operation. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxSignedZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return (-x) * y + FFX_BROADCAST_FLOAT16X2(1.0); +} + +/// Conditional free logic AND NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND NOT operator. +/// @param [in] y The second value to be fed into the AND NOT operator. +/// +/// @returns +/// Result of the AND NOT operation. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxSignedZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return (-x) * y + FFX_BROADCAST_FLOAT16X3(1.0); +} + +/// Conditional free logic AND NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND NOT operator. +/// @param [in] y The second value to be fed into the AND NOT operator. +/// +/// @returns +/// Result of the AND NOT operation. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxSignedZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return (-x) * y + FFX_BROADCAST_FLOAT16X4(1.0); +} + +/// Conditional free logic AND operation using two half-precision values followed by +/// a NOT operation using the resulting value and a third half-precision value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two half-precision values followed by +/// a NOT operation using the resulting value and a third half-precision value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two half-precision values followed by +/// a NOT operation using the resulting value and a third half-precision value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two half-precision values followed by +/// a NOT operation using the resulting value and a third half-precision value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) +{ + return ffxSaturate(x * y + z); +} + +/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x2 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x3 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x4 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF)); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneNotHalf(FfxFloat16 x) +{ + return FFX_BROADCAST_FLOAT16(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneNotHalf(FfxFloat16x2 x) +{ + return FFX_BROADCAST_FLOAT16X2(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneNotHalf(FfxFloat16x3 x) +{ + return FFX_BROADCAST_FLOAT16X3(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneNotHalf(FfxFloat16x4 x) +{ + return FFX_BROADCAST_FLOAT16X4(1.0) - x; +} + +/// Conditional free logic OR operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneOrHalf(FfxFloat16 x, FfxFloat16 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneOrHalf(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneOrHalf(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneOrHalf(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return max(x, y); +} + +/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneSelectHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) +{ + FfxFloat16 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneSelectHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) +{ + FfxFloat16x2 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneSelectHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) +{ + FfxFloat16x3 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneSelectHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) +{ + FfxFloat16x4 r = (-x) * z + z; + return x * y + r; +} + +/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat16 ffxZeroOneIsSignedHalf(FfxFloat16 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxZeroOneIsSignedHalf(FfxFloat16x2 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxZeroOneIsSignedHalf(FfxFloat16x3 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPUCore +FfxFloat16x4 ffxZeroOneIsSignedHalf(FfxFloat16x4 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] c The color to convert to Rec. 709. +/// +/// @returns +/// The color in Rec.709 space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxRec709FromLinearHalf(FfxFloat16 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099); + return clamp(j.x, c * j.y, pow(c, j.z) * k.x + k.y); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] c The color to convert to Rec. 709. +/// +/// @returns +/// The color in Rec.709 space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxRec709FromLinearHalf(FfxFloat16x2 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099); + return clamp(j.xx, c * j.yy, pow(c, j.zz) * k.xx + k.yy); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] c The color to convert to Rec. 709. +/// +/// @returns +/// The color in Rec.709 space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxRec709FromLinearHalf(FfxFloat16x3 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099); + return clamp(j.xxx, c * j.yyy, pow(c, j.zzz) * k.xxx + k.yyy); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGammaHalf. +/// +/// @param [in] c The value to convert to gamma space from linear. +/// @param [in] rcpX The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxGammaFromLinearHalf(FfxFloat16 c, FfxFloat16 rcpX) +{ + return pow(c, FFX_BROADCAST_FLOAT16(rcpX)); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGammaHalf. +/// +/// @param [in] c The value to convert to gamma space from linear. +/// @param [in] rcpX The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxGammaFromLinearHalf(FfxFloat16x2 c, FfxFloat16 rcpX) +{ + return pow(c, FFX_BROADCAST_FLOAT16X2(rcpX)); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGammaHalf. +/// +/// @param [in] c The value to convert to gamma space from linear. +/// @param [in] rcpX The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxGammaFromLinearHalf(FfxFloat16x3 c, FfxFloat16 rcpX) +{ + return pow(c, FFX_BROADCAST_FLOAT16X3(rcpX)); +} + +/// Compute an SRGB value from a linear value. +/// +/// @param [in] c The value to convert to SRGB from linear. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxSrgbFromLinearHalf(FfxFloat16 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055); + return clamp(j.x, c * j.y, pow(c, j.z) * k.x + k.y); +} + +/// Compute an SRGB value from a linear value. +/// +/// @param [in] c The value to convert to SRGB from linear. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxSrgbFromLinearHalf(FfxFloat16x2 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055); + return clamp(j.xx, c * j.yy, pow(c, j.zz) * k.xx + k.yy); +} + +/// Compute an SRGB value from a linear value. +/// +/// @param [in] c The value to convert to SRGB from linear. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxSrgbFromLinearHalf(FfxFloat16x3 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055); + return clamp(j.xxx, c * j.yyy, pow(c, j.zzz) * k.xxx + k.yyy); +} + +/// Compute the square root of a value. +/// +/// @param [in] c The value to compute the square root for. +/// +/// @returns +/// A square root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16 ffxSquareRootHalf(FfxFloat16 c) +{ + return sqrt(c); +} + +/// Compute the square root of a value. +/// +/// @param [in] c The value to compute the square root for. +/// +/// @returns +/// A square root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxSquareRootHalf(FfxFloat16x2 c) +{ + return sqrt(c); +} + +/// Compute the square root of a value. +/// +/// @param [in] c The value to compute the square root for. +/// +/// @returns +/// A square root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxSquareRootHalf(FfxFloat16x3 c) +{ + return sqrt(c); +} + +/// Compute the cube root of a value. +/// +/// @param [in] c The value to compute the cube root for. +/// +/// @returns +/// A cube root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16 ffxCubeRootHalf(FfxFloat16 c) +{ + return pow(c, FFX_BROADCAST_FLOAT16(1.0 / 3.0)); +} + +/// Compute the cube root of a value. +/// +/// @param [in] c The value to compute the cube root for. +/// +/// @returns +/// A cube root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxCubeRootHalf(FfxFloat16x2 c) +{ + return pow(c, FFX_BROADCAST_FLOAT16X2(1.0 / 3.0)); +} + +/// Compute the cube root of a value. +/// +/// @param [in] c The value to compute the cube root for. +/// +/// @returns +/// A cube root of the input value. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxCubeRootHalf(FfxFloat16x3 c) +{ + return pow(c, FFX_BROADCAST_FLOAT16X3(1.0 / 3.0)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] c The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxLinearFromRec709Half(FfxFloat16 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] c The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxLinearFromRec709Half(FfxFloat16x2 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] c The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxLinearFromRec709Half(FfxFloat16x3 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in gamma space. +/// @param [in] x The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxLinearFromGammaHalf(FfxFloat16 c, FfxFloat16 x) +{ + return pow(c, FFX_BROADCAST_FLOAT16(x)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in gamma space. +/// @param [in] x The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxLinearFromGammaHalf(FfxFloat16x2 c, FfxFloat16 x) +{ + return pow(c, FFX_BROADCAST_FLOAT16X2(x)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in gamma space. +/// @param [in] x The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxLinearFromGammaHalf(FfxFloat16x3 c, FfxFloat16 x) +{ + return pow(c, FFX_BROADCAST_FLOAT16X3(x)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat16x3 ffxLinearFromSrgbHalf(FfxFloat16x3 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz)); +} + +/// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear. +/// +/// Remap illustration: +/// +/// 543210 +/// ~~~~~~ +/// ..xxx. +/// yy...y +/// +/// @param [in] a The input 1D coordinates to remap. +/// +/// @returns +/// The remapped 2D coordinates. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxRemapForQuadHalf(FfxUInt32 a) +{ + return FfxUInt16x2(ffxBitfieldExtract(a, 1u, 3u), ffxBitfieldInsertMask(ffxBitfieldExtract(a, 3u, 3u), a, 1u)); +} + +/// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions. +/// +/// The 64-wide lane indices to 8x8 remapping is performed as follows: +/// +/// 00 01 08 09 10 11 18 19 +/// 02 03 0a 0b 12 13 1a 1b +/// 04 05 0c 0d 14 15 1c 1d +/// 06 07 0e 0f 16 17 1e 1f +/// 20 21 28 29 30 31 38 39 +/// 22 23 2a 2b 32 33 3a 3b +/// 24 25 2c 2d 34 35 3c 3d +/// 26 27 2e 2f 36 37 3e 3f +/// +/// @param [in] a The input 1D coordinate to remap. +/// +/// @returns +/// The remapped 2D coordinates. +/// +/// @ingroup GPUCore +FfxUInt16x2 ffxRemapForWaveReductionHalf(FfxUInt32 a) +{ + return FfxUInt16x2(ffxBitfieldInsertMask(ffxBitfieldExtract(a, 2u, 3u), a, 1u), ffxBitfieldInsertMask(ffxBitfieldExtract(a, 3u, 3u), ffxBitfieldExtract(a, 1u, 2u), 2u)); +} + +#endif // FFX_HALF diff --git a/Shaders/shaders/ffx_core_gpu_common_half.h.meta b/Shaders/shaders/ffx_core_gpu_common_half.h.meta new file mode 100644 index 0000000..cda0dd4 --- /dev/null +++ b/Shaders/shaders/ffx_core_gpu_common_half.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 93c5f50cadb9ff14cbf03fa7cb1de897 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_core_hlsl.h b/Shaders/shaders/ffx_core_hlsl.h new file mode 100644 index 0000000..28827d9 --- /dev/null +++ b/Shaders/shaders/ffx_core_hlsl.h @@ -0,0 +1,1898 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +/// @defgroup HLSLCore HLSL Core +/// HLSL core defines and functions +/// +/// @ingroup FfxHLSL + +#define DECLARE_SRV_REGISTER(regIndex) t##regIndex +#define DECLARE_UAV_REGISTER(regIndex) u##regIndex +#define DECLARE_CB_REGISTER(regIndex) b##regIndex +#define FFX_DECLARE_SRV(regIndex) register(DECLARE_SRV_REGISTER(regIndex)) +#define FFX_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex)) +#define FFX_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex)) + +/// A define for abstracting select functionality for pre/post HLSL 21 +/// +/// @ingroup HLSLCore +#if __HLSL_VERSION >= 2021 + +#define FFX_SELECT(cond, arg1, arg2) select(cond, arg1, arg2) + +#else // #if __HLSL_VERSION >= 2021 + +#define FFX_SELECT(cond, arg1, arg2) cond ? arg1 : arg2 + +#endif // #if __HLSL_VERSION >= 2021 + +/// A define for abstracting shared memory between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_GROUPSHARED groupshared + +/// A define for abstracting compute memory barriers between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_GROUP_MEMORY_BARRIER GroupMemoryBarrierWithGroupSync() + +/// A define for abstracting compute atomic additions between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_ATOMIC_ADD(x, y) InterlockedAdd(x, y) + +/// A define for abstracting compute atomic additions between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_ATOMIC_ADD_RETURN(x, y, r) InterlockedAdd(x, y, r) + +/// A define for abstracting compute atomic OR between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_ATOMIC_OR(x, y) InterlockedOr(x, y) + +/// A define for abstracting compute atomic min between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_ATOMIC_MIN(x, y) InterlockedMin(x, y) + +/// A define for abstracting compute atomic max between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_ATOMIC_MAX(x, y) InterlockedMax(x, y) + +/// A define added to accept static markup on functions to aid CPU/GPU portability of code. +/// +/// @ingroup HLSLCore +#define FFX_STATIC static + +/// A define for abstracting loop unrolling between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_UNROLL [unroll] + +/// A define for abstracting a 'greater than' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFX_GREATER_THAN(x, y) x > y + +/// A define for abstracting a 'greater than or equal' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFX_GREATER_THAN_EQUAL(x, y) x >= y + +/// A define for abstracting a 'less than' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFX_LESS_THAN(x, y) x < y + +/// A define for abstracting a 'less than or equal' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFX_LESS_THAN_EQUAL(x, y) x <= y + +/// A define for abstracting an 'equal' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFX_EQUAL(x, y) x == y + +/// A define for abstracting a 'not equal' comparison operator between two types. +/// +/// @ingroup HLSLCore +#define FFX_NOT_EQUAL(x, y) x != y + +/// A define for abstracting matrix multiply operations between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_MATRIX_MULTIPLY(a, b) mul(a, b) + +/// A define for abstracting vector transformations between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_TRANSFORM_VECTOR(a, b) mul(a, b) + +/// A define for abstracting modulo operations between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_MODULO(a, b) (fmod(a, b)) + +/// Broadcast a scalar value to a 1-dimensional floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_FLOAT32(x) FfxFloat32(x) + +/// Broadcast a scalar value to a 2-dimensional floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_FLOAT32X2(x) FfxFloat32(x) + +/// Broadcast a scalar value to a 3-dimensional floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_FLOAT32X3(x) FfxFloat32(x) + +/// Broadcast a scalar value to a 4-dimensional floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_FLOAT32X4(x) FfxFloat32(x) + +/// Broadcast a scalar value to a 1-dimensional unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_UINT32(x) FfxUInt32(x) + +/// Broadcast a scalar value to a 2-dimensional unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_UINT32X2(x) FfxUInt32(x) + +/// Broadcast a scalar value to a 4-dimensional unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_UINT32X3(x) FfxUInt32(x) + +/// Broadcast a scalar value to a 4-dimensional unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_UINT32X4(x) FfxUInt32(x) + +/// Broadcast a scalar value to a 1-dimensional signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_INT32(x) FfxInt32(x) + +/// Broadcast a scalar value to a 2-dimensional signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_INT32X2(x) FfxInt32(x) + +/// Broadcast a scalar value to a 3-dimensional signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_INT32X3(x) FfxInt32(x) + +/// Broadcast a scalar value to a 4-dimensional signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_INT32X4(x) FfxInt32(x) + +/// Broadcast a scalar value to a 1-dimensional half-precision floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_FLOAT16(a) FFX_MIN16_F(a) + +/// Broadcast a scalar value to a 2-dimensional half-precision floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_FLOAT16X2(a) FFX_MIN16_F(a) + +/// Broadcast a scalar value to a 3-dimensional half-precision floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_FLOAT16X3(a) FFX_MIN16_F(a) + +/// Broadcast a scalar value to a 4-dimensional half-precision floating point vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_FLOAT16X4(a) FFX_MIN16_F(a) + +/// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_UINT16(a) FFX_MIN16_U(a) + +/// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_UINT16X2(a) FFX_MIN16_U(a) + +/// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_UINT16X3(a) FFX_MIN16_U(a) + +/// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_UINT16X4(a) FFX_MIN16_U(a) + +/// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_INT16(a) FFX_MIN16_I(a) + +/// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_INT16X2(a) FFX_MIN16_I(a) + +/// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_INT16X3(a) FFX_MIN16_I(a) + +/// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector. +/// +/// @ingroup HLSLCore +#define FFX_BROADCAST_MIN_INT16X4(a) FFX_MIN16_I(a) + +/// Convert FfxFloat32 to half (in lower 16-bits of output). +/// +/// This function implements the same fast technique that is documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf +/// +/// The function supports denormals. +/// +/// Some conversion rules are to make computations possibly "safer" on the GPU, +/// -INF & -NaN -> -65504 +/// +INF & +NaN -> +65504 +/// +/// @param [in] f The 32bit floating point value to convert. +/// +/// @returns +/// The closest 16bit floating point value to f. +/// +/// @ingroup HLSLCore +#define ffxF32ToF16 f32tof16 + +/// Pack 2x32-bit floating point values in a single 32bit value. +/// +/// This function first converts each component of value into their nearest 16-bit floating +/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the +/// 32bit unsigned integer respectively. +/// +/// @param [in] value A 2-dimensional floating point value to convert and pack. +/// +/// @returns +/// A packed 32bit value containing 2 16bit floating point values. +/// +/// @ingroup HLSLCore +FfxUInt32 ffxPackHalf2x16(FfxFloat32x2 value) +{ + return ffxF32ToF16(value.x) | (ffxF32ToF16(value.y) << 16); +} + +/// Broadcast a scalar value to a 2-dimensional floating point vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 2-dimensional floating point vector with value in each component. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxBroadcast2(FfxFloat32 value) +{ + return FfxFloat32x2(value, value); +} + +/// Broadcast a scalar value to a 3-dimensional floating point vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 3-dimensional floating point vector with value in each component. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxBroadcast3(FfxFloat32 value) +{ + return FfxFloat32x3(value, value, value); +} + +/// Broadcast a scalar value to a 4-dimensional floating point vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 4-dimensional floating point vector with value in each component. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxBroadcast4(FfxFloat32 value) +{ + return FfxFloat32x4(value, value, value, value); +} + +/// Broadcast a scalar value to a 2-dimensional signed integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 2-dimensional signed integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxInt32x2 ffxBroadcast2(FfxInt32 value) +{ + return FfxInt32x2(value, value); +} + +/// Broadcast a scalar value to a 3-dimensional signed integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 3-dimensional signed integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxInt32x3 ffxBroadcast3(FfxInt32 value) +{ + return FfxInt32x3(value, value, value); +} + +/// Broadcast a scalar value to a 4-dimensional signed integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 4-dimensional signed integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxInt32x4 ffxBroadcast4(FfxInt32 value) +{ + return FfxInt32x4(value, value, value, value); +} + +/// Broadcast a scalar value to a 2-dimensional unsigned integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 2-dimensional unsigned integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxUInt32x2 ffxBroadcast2(FfxUInt32 value) +{ + return FfxUInt32x2(value, value); +} + +/// Broadcast a scalar value to a 3-dimensional unsigned integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 3-dimensional unsigned integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxUInt32x3 ffxBroadcast3(FfxUInt32 value) +{ + return FfxUInt32x3(value, value, value); +} + +/// Broadcast a scalar value to a 4-dimensional unsigned integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 4-dimensional unsigned integer vector with value in each component. +/// +/// @ingroup HLSLCore +FfxUInt32x4 ffxBroadcast4(FfxUInt32 value) +{ + return FfxUInt32x4(value, value, value, value); +} + +FfxUInt32 ffxBitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits) +{ + FfxUInt32 mask = (1u << bits) - 1; + return (src >> off) & mask; +} + +FfxUInt32 ffxBitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask) +{ + return (ins & mask) | (src & (~mask)); +} + +FfxUInt32 ffxBitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits) +{ + FfxUInt32 mask = (1u << bits) - 1; + return (ins & mask) | (src & (~mask)); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup HLSLCore +FfxUInt32 ffxAsUInt32(FfxFloat32 x) +{ + return asuint(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup HLSLCore +FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x) +{ + return asuint(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup HLSLCore +FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x) +{ + return asuint(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup HLSLCore +FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x) +{ + return asuint(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxAsFloat(FfxUInt32 x) +{ + return asfloat(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x) +{ + return asfloat(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x) +{ + return asfloat(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] x The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x) +{ + return asfloat(x); +} + +/// Compute the inverse of a value. +/// +/// @param [in] x The value to calulate the inverse of. +/// +/// @returns +/// The inverse of x. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxReciprocal(FfxFloat32 x) +{ + return rcp(x); +} + +/// Compute the inverse of a value. +/// +/// @param [in] x The value to calulate the inverse of. +/// +/// @returns +/// The inverse of x. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxReciprocal(FfxFloat32x2 x) +{ + return rcp(x); +} + +/// Compute the inverse of a value. +/// +/// @param [in] x The value to calulate the inverse of. +/// +/// @returns +/// The inverse of x. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxReciprocal(FfxFloat32x3 x) +{ + return rcp(x); +} + +/// Compute the inverse of a value. +/// +/// @param [in] x The value to calulate the inverse of. +/// +/// @returns +/// The inverse of x. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxReciprocal(FfxFloat32x4 x) +{ + return rcp(x); +} + +/// Compute the inverse square root of a value. +/// +/// @param [in] x The value to calulate the inverse square root of. +/// +/// @returns +/// The inverse square root of x. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxRsqrt(FfxFloat32 x) +{ + return rsqrt(x); +} + +/// Compute the inverse square root of a value. +/// +/// @param [in] x The value to calulate the inverse square root of. +/// +/// @returns +/// The inverse square root of x. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxRsqrt(FfxFloat32x2 x) +{ + return rsqrt(x); +} + +/// Compute the inverse square root of a value. +/// +/// @param [in] x The value to calulate the inverse square root of. +/// +/// @returns +/// The inverse square root of x. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxRsqrt(FfxFloat32x3 x) +{ + return rsqrt(x); +} + +/// Compute the inverse square root of a value. +/// +/// @param [in] x The value to calulate the inverse square root of. +/// +/// @returns +/// The inverse square root of x. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxRsqrt(FfxFloat32x4 x) +{ + return rsqrt(x); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t) +{ + return lerp(x, y, t); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxSaturate(FfxFloat32 x) +{ + return saturate(x); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxSaturate(FfxFloat32x2 x) +{ + return saturate(x); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxSaturate(FfxFloat32x3 x) +{ + return saturate(x); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxSaturate(FfxFloat32x4 x) +{ + return saturate(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxFract(FfxFloat32 x) +{ + return x - floor(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxFract(FfxFloat32x2 x) +{ + return x - floor(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxFract(FfxFloat32x3 x) +{ + return x - floor(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxFract(FfxFloat32x4 x) +{ + return x - floor(x); +} + +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxRound(FfxFloat32 x) +{ + return round(x); +} + +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxRound(FfxFloat32x2 x) +{ + return round(x); +} + +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxRound(FfxFloat32x3 x) +{ + return round(x); +} + +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxRound(FfxFloat32x4 x) +{ + return round(x); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) +{ + return max(x, max(y, z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z) +{ + return max(min(x, y), min(max(x, y), z)); + // return min(max(min(y, z), x), max(y, z)); + // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z) +{ + return max(min(x, y), min(max(x, y), z)); + // return min(max(min(y, z), x), max(y, z)); + // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_I32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calculation. +/// @param [in] z The third value to include in the min calculation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSLCore +FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) +{ + return min(x, min(y, z)); +} + + +FfxUInt32 ffxAShrSU1(FfxUInt32 a, FfxUInt32 b) +{ + return FfxUInt32(FfxInt32(a) >> FfxInt32(b)); +} + +FfxUInt32 ffxPackF32(FfxFloat32x2 v){ + FfxUInt32x2 p = FfxUInt32x2(ffxF32ToF16(FfxFloat32x2(v).x), ffxF32ToF16(FfxFloat32x2(v).y)); + return p.x | (p.y << 16); +} + +FfxFloat32x2 ffxUnpackF32(FfxUInt32 a){ + return f16tof32(FfxUInt32x2(a & 0xFFFF, a >> 16)); +} + +FfxUInt32x2 ffxPackF32x2(FfxFloat32x4 v){ + return FfxUInt32x2(ffxPackF32(v.xy), ffxPackF32(v.zw)); +} + +FfxFloat32x4 ffxUnpackF32x2(FfxUInt32x2 a){ + return FfxFloat32x4(ffxUnpackF32(a.x), ffxUnpackF32(a.y)); +} + +//============================================================================================================================== +// HLSL HALF +//============================================================================================================================== +//============================================================================================================================== +// Need to use manual unpack to get optimal execution (don't use packed types in buffers directly). +// Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/ +FFX_MIN16_F2 ffxUint32ToFloat16x2(FfxUInt32 x) +{ + FfxFloat32x2 t = f16tof32(FfxUInt32x2(x & 0xFFFF, x >> 16)); + return FFX_MIN16_F2(t); +} +FFX_MIN16_F4 ffxUint32x2ToFloat16x4(FfxUInt32x2 x) +{ + return FFX_MIN16_F4(ffxUint32ToFloat16x2(x.x), ffxUint32ToFloat16x2(x.y)); +} +FFX_MIN16_U2 ffxUint32ToUint16x2(FfxUInt32 x) +{ + FfxUInt32x2 t = FfxUInt32x2(x & 0xFFFF, x >> 16); + return FFX_MIN16_U2(t); +} +FFX_MIN16_U4 ffxUint32x2ToUint16x4(FfxUInt32x2 x) +{ + return FFX_MIN16_U4(ffxUint32ToUint16x2(x.x), ffxUint32ToUint16x2(x.y)); +} + +FfxUInt32x2 ffxFloat16x4ToUint32x2(FFX_MIN16_F4 v) +{ + FfxUInt32x2 result; + result.x = ffxF32ToF16(v.x) | (ffxF32ToF16(v.y) << 16); + result.y = ffxF32ToF16(v.z) | (ffxF32ToF16(v.w) << 16); + return result; +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32 ffxInvertSafe(FfxFloat32 v){ + FfxFloat32 s = FfxFloat32(sign(v)); + FfxFloat32 s2 = s*s; + return s2/(v + s2 - 1.0); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x2 ffxInvertSafe(FfxFloat32x2 v){ + FfxFloat32x2 s = FfxFloat32x2(sign(v)); + FfxFloat32x2 s2 = s*s; + return s2/(v + s2 - FfxFloat32x2(1.0, 1.0)); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x3 ffxInvertSafe(FfxFloat32x3 v){ + FfxFloat32x3 s = FfxFloat32x3(sign(v)); + FfxFloat32x3 s2 = s*s; + return s2/(v + s2 - FfxFloat32x3(1.0, 1.0, 1.0)); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x4 ffxInvertSafe(FfxFloat32x4 v){ + FfxFloat32x4 s = FfxFloat32x4(sign(v)); + FfxFloat32x4 s2 = s*s; + return s2/(v + s2 - FfxFloat32x4(1.0, 1.0, 1.0, 1.0)); +} + +#define FFX_UINT32_TO_FLOAT16X2(x) ffxUint32ToFloat16x2(FfxUInt32(x)) +#if FFX_HALF + +#define FFX_UINT32X2_TO_FLOAT16X4(x) ffxUint32x2ToFloat16x4(FfxUInt32x2(x)) +#define FFX_UINT32_TO_UINT16X2(x) ffxUint32ToUint16x2(FfxUInt32(x)) +#define FFX_UINT32X2_TO_UINT16X4(x) ffxUint32x2ToUint16x4(FfxUInt32x2(x)) + +FfxUInt32 ffxPackF16(FfxFloat16x2 v){ + FfxUInt32x2 p = FfxUInt32x2(ffxF32ToF16(FfxFloat32x2(v).x), ffxF32ToF16(FfxFloat32x2(v).y)); + return p.x | (p.y << 16); +} + +FfxFloat16x2 ffxUnpackF16(FfxUInt32 a){ + return FfxFloat16x2(f16tof32(FfxUInt32x2(a & 0xFFFF, a >> 16))); +} + +//------------------------------------------------------------------------------------------------------------------------------ +FfxUInt32 FFX_MIN16_F2ToUint32(FFX_MIN16_F2 x) +{ + return ffxF32ToF16(x.x) + (ffxF32ToF16(x.y) << 16); +} +FfxUInt32x2 FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4 x) +{ + return FfxUInt32x2(FFX_MIN16_F2ToUint32(x.xy), FFX_MIN16_F2ToUint32(x.zw)); +} +FfxUInt32 FFX_MIN16_U2ToUint32(FFX_MIN16_U2 x) +{ + return FfxUInt32(x.x) + (FfxUInt32(x.y) << 16); +} +FfxUInt32x2 FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4 x) +{ + return FfxUInt32x2(FFX_MIN16_U2ToUint32(x.xy), FFX_MIN16_U2ToUint32(x.zw)); +} +#define FFX_FLOAT16X2_TO_UINT32(x) FFX_MIN16_F2ToUint32(FFX_MIN16_F2(x)) +#define FFX_FLOAT16X4_TO_UINT32X2(x) FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4(x)) +#define FFX_UINT16X2_TO_UINT32(x) FFX_MIN16_U2ToUint32(FFX_MIN16_U2(x)) +#define FFX_UINT16X4_TO_UINT32X2(x) FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4(x)) + +#if (FFX_HLSL_SM >= 62) && !defined(FFX_NO_16_BIT_CAST) +#define FFX_TO_UINT16(x) asuint16(x) +#define FFX_TO_UINT16X2(x) asuint16(x) +#define FFX_TO_UINT16X3(x) asuint16(x) +#define FFX_TO_UINT16X4(x) asuint16(x) +#else +#define FFX_TO_UINT16(a) FFX_MIN16_U(ffxF32ToF16(FfxFloat32(a))) +#define FFX_TO_UINT16X2(a) FFX_MIN16_U2(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y)) +#define FFX_TO_UINT16X3(a) FFX_MIN16_U3(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z)) +#define FFX_TO_UINT16X4(a) FFX_MIN16_U4(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z), FFX_TO_UINT16((a).w)) +#endif // #if (FFX_HLSL_SM>=62) && !defined(FFX_NO_16_BIT_CAST) + +#if (FFX_HLSL_SM >= 62) && !defined(FFX_NO_16_BIT_CAST) +#define FFX_TO_FLOAT16(x) asfloat16(x) +#define FFX_TO_FLOAT16X2(x) asfloat16(x) +#define FFX_TO_FLOAT16X3(x) asfloat16(x) +#define FFX_TO_FLOAT16X4(x) asfloat16(x) +#else +#define FFX_TO_FLOAT16(a) FFX_MIN16_F(f16tof32(FfxUInt32(a))) +#define FFX_TO_FLOAT16X2(a) FFX_MIN16_F2(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y)) +#define FFX_TO_FLOAT16X3(a) FFX_MIN16_F3(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z)) +#define FFX_TO_FLOAT16X4(a) FFX_MIN16_F4(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z), FFX_TO_FLOAT16((a).w)) +#endif // #if (FFX_HLSL_SM>=62) && !defined(FFX_NO_16_BIT_CAST) + +//============================================================================================================================== +#define FFX_BROADCAST_FLOAT16(a) FFX_MIN16_F(a) +#define FFX_BROADCAST_FLOAT16X2(a) FFX_MIN16_F(a) +#define FFX_BROADCAST_FLOAT16X3(a) FFX_MIN16_F(a) +#define FFX_BROADCAST_FLOAT16X4(a) FFX_MIN16_F(a) + +//------------------------------------------------------------------------------------------------------------------------------ +#define FFX_BROADCAST_INT16(a) FFX_MIN16_I(a) +#define FFX_BROADCAST_INT16X2(a) FFX_MIN16_I(a) +#define FFX_BROADCAST_INT16X3(a) FFX_MIN16_I(a) +#define FFX_BROADCAST_INT16X4(a) FFX_MIN16_I(a) + +//------------------------------------------------------------------------------------------------------------------------------ +#define FFX_BROADCAST_UINT16(a) FFX_MIN16_U(a) +#define FFX_BROADCAST_UINT16X2(a) FFX_MIN16_U(a) +#define FFX_BROADCAST_UINT16X3(a) FFX_MIN16_U(a) +#define FFX_BROADCAST_UINT16X4(a) FFX_MIN16_U(a) + +//============================================================================================================================== +FFX_MIN16_U ffxAbsHalf(FFX_MIN16_U a) +{ + return FFX_MIN16_U(abs(FFX_MIN16_I(a))); +} +FFX_MIN16_U2 ffxAbsHalf(FFX_MIN16_U2 a) +{ + return FFX_MIN16_U2(abs(FFX_MIN16_I2(a))); +} +FFX_MIN16_U3 ffxAbsHalf(FFX_MIN16_U3 a) +{ + return FFX_MIN16_U3(abs(FFX_MIN16_I3(a))); +} +FFX_MIN16_U4 ffxAbsHalf(FFX_MIN16_U4 a) +{ + return FFX_MIN16_U4(abs(FFX_MIN16_I4(a))); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxClampHalf(FFX_MIN16_F x, FFX_MIN16_F n, FFX_MIN16_F m) +{ + return max(n, min(x, m)); +} +FFX_MIN16_F2 ffxClampHalf(FFX_MIN16_F2 x, FFX_MIN16_F2 n, FFX_MIN16_F2 m) +{ + return max(n, min(x, m)); +} +FFX_MIN16_F3 ffxClampHalf(FFX_MIN16_F3 x, FFX_MIN16_F3 n, FFX_MIN16_F3 m) +{ + return max(n, min(x, m)); +} +FFX_MIN16_F4 ffxClampHalf(FFX_MIN16_F4 x, FFX_MIN16_F4 n, FFX_MIN16_F4 m) +{ + return max(n, min(x, m)); +} +//------------------------------------------------------------------------------------------------------------------------------ +// V_FRACT_F16 (note DX frac() is different). +FFX_MIN16_F ffxFract(FFX_MIN16_F x) +{ + return x - floor(x); +} +FFX_MIN16_F2 ffxFract(FFX_MIN16_F2 x) +{ + return x - floor(x); +} +FFX_MIN16_F3 ffxFract(FFX_MIN16_F3 x) +{ + return x - floor(x); +} +FFX_MIN16_F4 ffxFract(FFX_MIN16_F4 x) +{ + return x - floor(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxLerp(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F a) +{ + return lerp(x, y, a); +} +FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F a) +{ + return lerp(x, y, a); +} +FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 a) +{ + return lerp(x, y, a); +} +FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F a) +{ + return lerp(x, y, a); +} +FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 a) +{ + return lerp(x, y, a); +} +FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F a) +{ + return lerp(x, y, a); +} +FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 a) +{ + return lerp(x, y, a); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxMax3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z) +{ + return max(x, max(y, z)); +} +FFX_MIN16_F2 ffxMax3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z) +{ + return max(x, max(y, z)); +} +FFX_MIN16_F3 ffxMax3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z) +{ + return max(x, max(y, z)); +} +FFX_MIN16_F4 ffxMax3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z) +{ + return max(x, max(y, z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxMin3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z) +{ + return min(x, min(y, z)); +} +FFX_MIN16_F2 ffxMin3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z) +{ + return min(x, min(y, z)); +} +FFX_MIN16_F3 ffxMin3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z) +{ + return min(x, min(y, z)); +} +FFX_MIN16_F4 ffxMin3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z) +{ + return min(x, min(y, z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxMed3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_F2 ffxMed3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_F3 ffxMed3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_F4 ffxMed3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_I ffxMed3Half(FFX_MIN16_I x, FFX_MIN16_I y, FFX_MIN16_I z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_I2 ffxMed3Half(FFX_MIN16_I2 x, FFX_MIN16_I2 y, FFX_MIN16_I2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_I3 ffxMed3Half(FFX_MIN16_I3 x, FFX_MIN16_I3 y, FFX_MIN16_I3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FFX_MIN16_I4 ffxMed3Half(FFX_MIN16_I4 x, FFX_MIN16_I4 y, FFX_MIN16_I4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxReciprocalHalf(FFX_MIN16_F x) +{ + return rcp(x); +} +FFX_MIN16_F2 ffxReciprocalHalf(FFX_MIN16_F2 x) +{ + return rcp(x); +} +FFX_MIN16_F3 ffxReciprocalHalf(FFX_MIN16_F3 x) +{ + return rcp(x); +} +FFX_MIN16_F4 ffxReciprocalHalf(FFX_MIN16_F4 x) +{ + return rcp(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxReciprocalSquareRootHalf(FFX_MIN16_F x) +{ + return rsqrt(x); +} +FFX_MIN16_F2 ffxReciprocalSquareRootHalf(FFX_MIN16_F2 x) +{ + return rsqrt(x); +} +FFX_MIN16_F3 ffxReciprocalSquareRootHalf(FFX_MIN16_F3 x) +{ + return rsqrt(x); +} +FFX_MIN16_F4 ffxReciprocalSquareRootHalf(FFX_MIN16_F4 x) +{ + return rsqrt(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxSaturate(FFX_MIN16_F x) +{ + return saturate(x); +} +FFX_MIN16_F2 ffxSaturate(FFX_MIN16_F2 x) +{ + return saturate(x); +} +FFX_MIN16_F3 ffxSaturate(FFX_MIN16_F3 x) +{ + return saturate(x); +} +FFX_MIN16_F4 ffxSaturate(FFX_MIN16_F4 x) +{ + return saturate(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_U ffxBitShiftRightHalf(FFX_MIN16_U a, FFX_MIN16_U b) +{ + return FFX_MIN16_U(FFX_MIN16_I(a) >> FFX_MIN16_I(b)); +} +FFX_MIN16_U2 ffxBitShiftRightHalf(FFX_MIN16_U2 a, FFX_MIN16_U2 b) +{ + return FFX_MIN16_U2(FFX_MIN16_I2(a) >> FFX_MIN16_I2(b)); +} +FFX_MIN16_U3 ffxBitShiftRightHalf(FFX_MIN16_U3 a, FFX_MIN16_U3 b) +{ + return FFX_MIN16_U3(FFX_MIN16_I3(a) >> FFX_MIN16_I3(b)); +} +FFX_MIN16_U4 ffxBitShiftRightHalf(FFX_MIN16_U4 a, FFX_MIN16_U4 b) +{ + return FFX_MIN16_U4(FFX_MIN16_I4(a) >> FFX_MIN16_I4(b)); +} +#endif // FFX_HALF + +//============================================================================================================================== +// HLSL WAVE +//============================================================================================================================== +#if defined(FFX_WAVE) +// Where 'x' must be a compile time literal. +FfxFloat32 ffxWaveXorF1(FfxFloat32 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxFloat32x2 ffxWaveXorF2(FfxFloat32x2 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxFloat32x3 ffxWaveXorF3(FfxFloat32x3 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxFloat32x4 ffxWaveXorF4(FfxFloat32x4 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxUInt32 ffxWaveXorU1(FfxUInt32 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxUInt32x2 ffxWaveXorU1(FfxUInt32x2 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxUInt32x3 ffxWaveXorU1(FfxUInt32x3 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxUInt32x4 ffxWaveXorU1(FfxUInt32x4 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxBoolean ffxWaveIsFirstLane() +{ + return WaveIsFirstLane(); +} +FfxUInt32 ffxWaveLaneIndex() +{ + return WaveGetLaneIndex(); +} +FfxBoolean ffxWaveReadAtLaneIndexB1(FfxBoolean v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, x); +} +FfxUInt32 ffxWavePrefixCountBits(FfxBoolean v) +{ + return WavePrefixCountBits(v); +} +FfxUInt32 ffxWaveActiveCountBits(FfxBoolean v) +{ + return WaveActiveCountBits(v); +} +FfxUInt32 ffxWaveReadLaneFirstU1(FfxUInt32 v) +{ + return WaveReadLaneFirst(v); +} +FfxUInt32x2 ffxWaveReadLaneFirstU2(FfxUInt32x2 v) +{ + return WaveReadLaneFirst(v); +} +FfxBoolean ffxWaveReadLaneFirstB1(FfxBoolean v) +{ + return WaveReadLaneFirst(v); +} +FfxUInt32 ffxWaveOr(FfxUInt32 a) +{ + return WaveActiveBitOr(a); +} +FfxUInt32 ffxWaveMin(FfxUInt32 a) +{ + return WaveActiveMin(a); +} +FfxFloat32 ffxWaveMin(FfxFloat32 a) +{ + return WaveActiveMin(a); +} +FfxUInt32 ffxWaveMax(FfxUInt32 a) +{ + return WaveActiveMax(a); +} +FfxFloat32 ffxWaveMax(FfxFloat32 a) +{ + return WaveActiveMax(a); +} +FfxUInt32 ffxWaveSum(FfxUInt32 a) +{ + return WaveActiveSum(a); +} +FfxFloat32 ffxWaveSum(FfxFloat32 a) +{ + return WaveActiveSum(a); +} +FfxUInt32 ffxWaveLaneCount() +{ + return WaveGetLaneCount(); +} +FfxBoolean ffxWaveAllTrue(FfxBoolean v) +{ + return WaveActiveAllTrue(v); +} +FfxFloat32 ffxQuadReadX(FfxFloat32 v) +{ + return QuadReadAcrossX(v); +} +FfxFloat32x2 ffxQuadReadX(FfxFloat32x2 v) +{ + return QuadReadAcrossX(v); +} +FfxFloat32 ffxQuadReadY(FfxFloat32 v) +{ + return QuadReadAcrossY(v); +} +FfxFloat32x2 ffxQuadReadY(FfxFloat32x2 v) +{ + return QuadReadAcrossY(v); +} + +#if FFX_HALF +FfxFloat16x2 ffxWaveXorFloat16x2(FfxFloat16x2 v, FfxUInt32 x) +{ + return FFX_UINT32_TO_FLOAT16X2(WaveReadLaneAt(FFX_FLOAT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x)); +} +FfxFloat16x4 ffxWaveXorFloat16x4(FfxFloat16x4 v, FfxUInt32 x) +{ + return FFX_UINT32X2_TO_FLOAT16X4(WaveReadLaneAt(FFX_FLOAT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x)); +} +FfxUInt16x2 ffxWaveXorUint16x2(FfxUInt16x2 v, FfxUInt32 x) +{ + return FFX_UINT32_TO_UINT16X2(WaveReadLaneAt(FFX_UINT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x)); +} +FfxUInt16x4 ffxWaveXorUint16x4(FfxUInt16x4 v, FfxUInt32 x) +{ + return FFX_UINT32X2_TO_UINT16X4(WaveReadLaneAt(FFX_UINT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x)); +} +#endif // FFX_HALF +#endif // #if defined(FFX_WAVE) diff --git a/Shaders/shaders/ffx_core_hlsl.h.meta b/Shaders/shaders/ffx_core_hlsl.h.meta new file mode 100644 index 0000000..7c889bd --- /dev/null +++ b/Shaders/shaders/ffx_core_hlsl.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 47a04cb434355164b8da169fbd474688 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_core_portability.h b/Shaders/shaders/ffx_core_portability.h new file mode 100644 index 0000000..12147b9 --- /dev/null +++ b/Shaders/shaders/ffx_core_portability.h @@ -0,0 +1,46 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +void ffxOpAAddOneF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +{ + d = a + ffxBroadcast3(b); +} + +void ffxOpACpyF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a) +{ + d = a; +} + +void ffxOpAMulF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b) +{ + d = a * b; +} + +void ffxOpAMulOneF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +{ + d = a * b; +} + +void ffxOpARcpF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a) +{ + d = ffxReciprocal(a); +} diff --git a/Shaders/shaders/ffx_core_portability.h.meta b/Shaders/shaders/ffx_core_portability.h.meta new file mode 100644 index 0000000..2f87125 --- /dev/null +++ b/Shaders/shaders/ffx_core_portability.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 9dc8a5ba90431934588adf0dabbf0cdd +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_fsr2_accumulate_pass.hlsl b/Shaders/shaders/ffx_fsr2_accumulate_pass.hlsl new file mode 100644 index 0000000..59cfd80 --- /dev/null +++ b/Shaders/shaders/ffx_fsr2_accumulate_pass.hlsl @@ -0,0 +1,79 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR2_BIND_SRV_INPUT_EXPOSURE 0 +#define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS 1 +#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 2 +#else +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 2 +#endif +#define FSR2_BIND_SRV_INTERNAL_UPSCALED 3 +#define FSR2_BIND_SRV_LOCK_STATUS 4 +#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 5 +#define FSR2_BIND_SRV_LANCZOS_LUT 6 +#define FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT 7 +#define FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS 8 +#define FSR2_BIND_SRV_AUTO_EXPOSURE 9 +#define FSR2_BIND_SRV_LUMA_HISTORY 10 + +#define FSR2_BIND_UAV_INTERNAL_UPSCALED 0 +#define FSR2_BIND_UAV_LOCK_STATUS 1 +#define FSR2_BIND_UAV_UPSCALED_OUTPUT 2 +#define FSR2_BIND_UAV_NEW_LOCKS 3 +#define FSR2_BIND_UAV_LUMA_HISTORY 4 + +#define FSR2_BIND_CB_FSR2 0 + +#include "fsr2/ffx_fsr2_callbacks_hlsl.h" +#include "fsr2/ffx_fsr2_common.h" +#include "fsr2/ffx_fsr2_sample.h" +#include "fsr2/ffx_fsr2_upsample.h" +#include "fsr2/ffx_fsr2_postprocess_lock_status.h" +#include "fsr2/ffx_fsr2_reproject.h" +#include "fsr2/ffx_fsr2_accumulate.h" + +#ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#define FFX_FSR2_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 +#endif // FFX_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#define FFX_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#ifndef FFX_FSR2_NUM_THREADS +#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR2_NUM_THREADS + +FFX_PREFER_WAVE64 +FFX_FSR2_NUM_THREADS +FFX_FSR2_EMBED_ROOTSIG_CONTENT +void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) +{ + const uint GroupRows = (uint(DisplaySize().y) + FFX_FSR2_THREAD_GROUP_HEIGHT - 1) / FFX_FSR2_THREAD_GROUP_HEIGHT; + uGroupId.y = GroupRows - uGroupId.y - 1; + + uint2 uDispatchThreadId = uGroupId * uint2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + uGroupThreadId; + + Accumulate(uDispatchThreadId); +} diff --git a/Shaders/shaders/ffx_fsr2_accumulate_pass.hlsl.meta b/Shaders/shaders/ffx_fsr2_accumulate_pass.hlsl.meta new file mode 100644 index 0000000..a5a43a9 --- /dev/null +++ b/Shaders/shaders/ffx_fsr2_accumulate_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 2587b11e2d882c649ac7fd1a51f6dc8c +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_fsr2_autogen_reactive_pass.hlsl b/Shaders/shaders/ffx_fsr2_autogen_reactive_pass.hlsl new file mode 100644 index 0000000..1a5995b --- /dev/null +++ b/Shaders/shaders/ffx_fsr2_autogen_reactive_pass.hlsl @@ -0,0 +1,78 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY 0 +#define FSR2_BIND_SRV_INPUT_COLOR 1 + +#define FSR2_BIND_UAV_AUTOREACTIVE 0 + +#define FSR2_BIND_CB_FSR2 0 +#define FSR2_BIND_CB_REACTIVE 1 + +#include "fsr2/ffx_fsr2_callbacks_hlsl.h" +#include "fsr2/ffx_fsr2_common.h" + +#ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#define FFX_FSR2_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 +#endif // FFX_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#define FFX_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#ifndef FFX_FSR2_NUM_THREADS +#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR2_NUM_THREADS + +FFX_PREFER_WAVE64 +FFX_FSR2_NUM_THREADS +FFX_FSR2_EMBED_ROOTSIG_REACTIVE_CONTENT +void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) +{ + uint2 uDispatchThreadId = uGroupId * uint2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + uGroupThreadId; + + float3 ColorPreAlpha = LoadOpaqueOnly( FFX_MIN16_I2(uDispatchThreadId) ).rgb; + float3 ColorPostAlpha = LoadInputColor(uDispatchThreadId).rgb; + + if (GenReactiveFlags() & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP) + { + ColorPreAlpha = Tonemap(ColorPreAlpha); + ColorPostAlpha = Tonemap(ColorPostAlpha); + } + + if (GenReactiveFlags() & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP) + { + ColorPreAlpha = InverseTonemap(ColorPreAlpha); + ColorPostAlpha = InverseTonemap(ColorPostAlpha); + } + + float out_reactive_value = 0.f; + float3 delta = abs(ColorPostAlpha - ColorPreAlpha); + + out_reactive_value = (GenReactiveFlags() & FFX_FSR2_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX) ? max(delta.x, max(delta.y, delta.z)) : length(delta); + out_reactive_value *= GenReactiveScale(); + + out_reactive_value = (GenReactiveFlags() & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD) ? (out_reactive_value < GenReactiveThreshold() ? 0 : GenReactiveBinaryValue()) : out_reactive_value; + + rw_output_autoreactive[uDispatchThreadId] = out_reactive_value; +} diff --git a/Shaders/shaders/ffx_fsr2_autogen_reactive_pass.hlsl.meta b/Shaders/shaders/ffx_fsr2_autogen_reactive_pass.hlsl.meta new file mode 100644 index 0000000..776a7af --- /dev/null +++ b/Shaders/shaders/ffx_fsr2_autogen_reactive_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 15c74efa821aaf3429f92b8d5d9901d5 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_fsr2_compute_luminance_pyramid_pass.hlsl b/Shaders/shaders/ffx_fsr2_compute_luminance_pyramid_pass.hlsl new file mode 100644 index 0000000..31de05f --- /dev/null +++ b/Shaders/shaders/ffx_fsr2_compute_luminance_pyramid_pass.hlsl @@ -0,0 +1,56 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR2_BIND_SRV_INPUT_COLOR 0 + +#define FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC 0 +#define FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE 1 +#define FSR2_BIND_UAV_EXPOSURE_MIP_5 2 +#define FSR2_BIND_UAV_AUTO_EXPOSURE 3 + +#define FSR2_BIND_CB_FSR2 0 +#define FSR2_BIND_CB_SPD 1 + +#include "fsr2/ffx_fsr2_callbacks_hlsl.h" +#include "fsr2/ffx_fsr2_common.h" +#include "fsr2/ffx_fsr2_compute_luminance_pyramid.h" + +#ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#define FFX_FSR2_THREAD_GROUP_WIDTH 256 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#define FFX_FSR2_THREAD_GROUP_HEIGHT 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#define FFX_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#ifndef FFX_FSR2_NUM_THREADS +#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR2_NUM_THREADS + +FFX_PREFER_WAVE64 +FFX_FSR2_NUM_THREADS +FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT +void CS(uint3 WorkGroupId : SV_GroupID, uint LocalThreadIndex : SV_GroupIndex) +{ + ComputeAutoExposure(WorkGroupId, LocalThreadIndex); +} diff --git a/Shaders/shaders/ffx_fsr2_compute_luminance_pyramid_pass.hlsl.meta b/Shaders/shaders/ffx_fsr2_compute_luminance_pyramid_pass.hlsl.meta new file mode 100644 index 0000000..8ed0c35 --- /dev/null +++ b/Shaders/shaders/ffx_fsr2_compute_luminance_pyramid_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 630d959ddf3e02e4087e181b7d7cd7ed +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_fsr2_debug_blit.hlsl b/Shaders/shaders/ffx_fsr2_debug_blit.hlsl new file mode 100644 index 0000000..4751b8b --- /dev/null +++ b/Shaders/shaders/ffx_fsr2_debug_blit.hlsl @@ -0,0 +1,49 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +Texture2D r_in : register(t0); +RWTexture2D rw_out : register(u0); + +cbuffer cbBlit0 : register(b0) { + + int g_outChannelRed; + int g_outChannelGreen; + int g_outChannelBlue; + float2 outChannelRedMinMax; + float2 outChannelGreenMinMax; + float2 outChannelBlueMinMax; +}; + +[numthreads(8, 8, 1)] +void CS(uint3 globalID : SV_DispatchThreadID) +{ + float4 srcColor = r_in[globalID.xy]; + + // remap channels + float4 dstColor = float4(srcColor[g_outChannelRed], srcColor[g_outChannelGreen], srcColor[g_outChannelBlue], 1.f); + + // apply offset and scale + dstColor.rgb -= float3(outChannelRedMinMax.x, outChannelGreenMinMax.x, outChannelBlueMinMax.x); + dstColor.rgb /= float3(outChannelRedMinMax.y - outChannelRedMinMax.x, outChannelGreenMinMax.y - outChannelGreenMinMax.x, outChannelBlueMinMax.y - outChannelBlueMinMax.x); + + rw_out[globalID.xy] = float4(dstColor); +} diff --git a/Shaders/shaders/ffx_fsr2_debug_blit.hlsl.meta b/Shaders/shaders/ffx_fsr2_debug_blit.hlsl.meta new file mode 100644 index 0000000..df33f76 --- /dev/null +++ b/Shaders/shaders/ffx_fsr2_debug_blit.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 8302faae2234907489de5569919fc172 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_fsr2_depth_clip_pass.hlsl b/Shaders/shaders/ffx_fsr2_depth_clip_pass.hlsl new file mode 100644 index 0000000..30bbcc0 --- /dev/null +++ b/Shaders/shaders/ffx_fsr2_depth_clip_pass.hlsl @@ -0,0 +1,67 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 +#define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 1 +#define FSR2_BIND_SRV_DILATED_DEPTH 2 +#define FSR2_BIND_SRV_REACTIVE_MASK 3 +#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 4 +#define FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS 5 +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 6 +#define FSR2_BIND_SRV_INPUT_COLOR 7 +#define FSR2_BIND_SRV_INPUT_DEPTH 8 +#define FSR2_BIND_SRV_INPUT_EXPOSURE 9 + +#define FSR2_BIND_UAV_DILATED_REACTIVE_MASKS 0 +#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR 1 + +#define FSR2_BIND_CB_FSR2 0 + +#include "fsr2/ffx_fsr2_callbacks_hlsl.h" +#include "fsr2/ffx_fsr2_common.h" +#include "fsr2/ffx_fsr2_sample.h" +#include "fsr2/ffx_fsr2_depth_clip.h" + +#ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#define FFX_FSR2_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#define FFX_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#ifndef FFX_FSR2_NUM_THREADS +#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR2_NUM_THREADS + +FFX_PREFER_WAVE64 +FFX_FSR2_NUM_THREADS +FFX_FSR2_EMBED_ROOTSIG_CONTENT +void CS( + int2 iGroupId : SV_GroupID, + int2 iDispatchThreadId : SV_DispatchThreadID, + int2 iGroupThreadId : SV_GroupThreadID, + int iGroupIndex : SV_GroupIndex) +{ + DepthClip(iDispatchThreadId); +} diff --git a/Shaders/shaders/ffx_fsr2_depth_clip_pass.hlsl.meta b/Shaders/shaders/ffx_fsr2_depth_clip_pass.hlsl.meta new file mode 100644 index 0000000..794fe20 --- /dev/null +++ b/Shaders/shaders/ffx_fsr2_depth_clip_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 38a518d5929d9134096f87f66f59a285 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_fsr2_lock_pass.hlsl b/Shaders/shaders/ffx_fsr2_lock_pass.hlsl new file mode 100644 index 0000000..dec17fe --- /dev/null +++ b/Shaders/shaders/ffx_fsr2_lock_pass.hlsl @@ -0,0 +1,56 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR2_BIND_SRV_LOCK_INPUT_LUMA 0 + +#define FSR2_BIND_UAV_NEW_LOCKS 0 +#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 1 + +#define FSR2_BIND_CB_FSR2 0 + +#include "fsr2/ffx_fsr2_callbacks_hlsl.h" +#include "fsr2/ffx_fsr2_common.h" +#include "fsr2/ffx_fsr2_sample.h" +#include "fsr2/ffx_fsr2_lock.h" + +#ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#define FFX_FSR2_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#define FFX_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#ifndef FFX_FSR2_NUM_THREADS +#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR2_NUM_THREADS + +FFX_PREFER_WAVE64 +FFX_FSR2_NUM_THREADS +FFX_FSR2_EMBED_ROOTSIG_CONTENT +void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) +{ + uint2 uDispatchThreadId = uGroupId * uint2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + uGroupThreadId; + + ComputeLock(uDispatchThreadId); +} diff --git a/Shaders/shaders/ffx_fsr2_lock_pass.hlsl.meta b/Shaders/shaders/ffx_fsr2_lock_pass.hlsl.meta new file mode 100644 index 0000000..7dcfaed --- /dev/null +++ b/Shaders/shaders/ffx_fsr2_lock_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: c3bf93953aee91541a1be089c2ed268f +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_fsr2_rcas_pass.hlsl b/Shaders/shaders/ffx_fsr2_rcas_pass.hlsl new file mode 100644 index 0000000..6fd6d29 --- /dev/null +++ b/Shaders/shaders/ffx_fsr2_rcas_pass.hlsl @@ -0,0 +1,54 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR2_BIND_SRV_INPUT_EXPOSURE 0 +#define FSR2_BIND_SRV_RCAS_INPUT 1 + +#define FSR2_BIND_UAV_UPSCALED_OUTPUT 0 + +#define FSR2_BIND_CB_FSR2 0 +#define FSR2_BIND_CB_RCAS 1 + +#include "fsr2/ffx_fsr2_callbacks_hlsl.h" +#include "fsr2/ffx_fsr2_common.h" +#include "fsr2/ffx_fsr2_rcas.h" + +#ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#define FFX_FSR2_THREAD_GROUP_WIDTH 64 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#define FFX_FSR2_THREAD_GROUP_HEIGHT 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#define FFX_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#ifndef FFX_FSR2_NUM_THREADS +#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR2_NUM_THREADS + +FFX_PREFER_WAVE64 +FFX_FSR2_NUM_THREADS +FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT +void CS(uint3 LocalThreadId : SV_GroupThreadID, uint3 WorkGroupId : SV_GroupID, uint3 Dtid : SV_DispatchThreadID) +{ + RCAS(LocalThreadId, WorkGroupId, Dtid); +} diff --git a/Shaders/shaders/ffx_fsr2_rcas_pass.hlsl.meta b/Shaders/shaders/ffx_fsr2_rcas_pass.hlsl.meta new file mode 100644 index 0000000..b1198a1 --- /dev/null +++ b/Shaders/shaders/ffx_fsr2_rcas_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 43f9d03e57211f8478e3388eecb8d5f8 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_fsr2_reconstruct_previous_depth_pass.hlsl b/Shaders/shaders/ffx_fsr2_reconstruct_previous_depth_pass.hlsl new file mode 100644 index 0000000..7e17318 --- /dev/null +++ b/Shaders/shaders/ffx_fsr2_reconstruct_previous_depth_pass.hlsl @@ -0,0 +1,64 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 0 +#define FSR2_BIND_SRV_INPUT_DEPTH 1 +#define FSR2_BIND_SRV_INPUT_COLOR 2 +#define FSR2_BIND_SRV_INPUT_EXPOSURE 3 + +#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 +#define FSR2_BIND_UAV_DILATED_MOTION_VECTORS 1 +#define FSR2_BIND_UAV_DILATED_DEPTH 2 +#define FSR2_BIND_UAV_LOCK_INPUT_LUMA 3 + +#define FSR2_BIND_CB_FSR2 0 + +#include "fsr2/ffx_fsr2_callbacks_hlsl.h" +#include "fsr2/ffx_fsr2_common.h" +#include "fsr2/ffx_fsr2_sample.h" +#include "fsr2/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h" + +#ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#define FFX_FSR2_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#define FFX_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#ifndef FFX_FSR2_NUM_THREADS +#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR2_NUM_THREADS + +FFX_PREFER_WAVE64 +FFX_FSR2_NUM_THREADS +FFX_FSR2_EMBED_ROOTSIG_CONTENT +void CS( + int2 iGroupId : SV_GroupID, + int2 iDispatchThreadId : SV_DispatchThreadID, + int2 iGroupThreadId : SV_GroupThreadID, + int iGroupIndex : SV_GroupIndex +) +{ + ReconstructAndDilate(iDispatchThreadId); +} diff --git a/Shaders/shaders/ffx_fsr2_reconstruct_previous_depth_pass.hlsl.meta b/Shaders/shaders/ffx_fsr2_reconstruct_previous_depth_pass.hlsl.meta new file mode 100644 index 0000000..622615b --- /dev/null +++ b/Shaders/shaders/ffx_fsr2_reconstruct_previous_depth_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: c7220f38e87579a4788ddaabba0a6d97 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_fsr2_tcr_autogen_pass.hlsl b/Shaders/shaders/ffx_fsr2_tcr_autogen_pass.hlsl new file mode 100644 index 0000000..765cc8b --- /dev/null +++ b/Shaders/shaders/ffx_fsr2_tcr_autogen_pass.hlsl @@ -0,0 +1,92 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY 0 +#define FSR2_BIND_SRV_INPUT_COLOR 1 +#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 2 +#define FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR 3 +#define FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR 4 +#define FSR2_BIND_SRV_REACTIVE_MASK 4 +#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 5 + +#define FSR2_BIND_UAV_AUTOREACTIVE 0 +#define FSR2_BIND_UAV_AUTOCOMPOSITION 1 +#define FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR 2 +#define FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR 3 + +#define FSR2_BIND_CB_FSR2 0 +#define FSR2_BIND_CB_AUTOREACTIVE 1 + +#include "fsr2/ffx_fsr2_callbacks_hlsl.h" +#include "fsr2/ffx_fsr2_common.h" +#include "fsr2/ffx_fsr2_tcr_autogen.h" + +#ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#define FFX_FSR2_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 +#endif // FFX_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#define FFX_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#ifndef FFX_FSR2_NUM_THREADS +#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR2_NUM_THREADS + +FFX_PREFER_WAVE64 +FFX_FSR2_NUM_THREADS +FFX_FSR2_EMBED_ROOTSIG_REACTIVE_CONTENT +void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) +{ + FFX_MIN16_I2 uDispatchThreadId = FFX_MIN16_I2(uGroupId * uint2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + uGroupThreadId); + + // ToDo: take into account jitter (i.e. add delta of previous jitter and current jitter to previous UV + // fetch pre- and post-alpha color values + FFX_MIN16_F2 fUv = ( FFX_MIN16_F2(uDispatchThreadId) + FFX_MIN16_F2(0.5f, 0.5f) ) / FFX_MIN16_F2( RenderSize() ); + FFX_MIN16_F2 fPrevUV = fUv + FFX_MIN16_F2( LoadInputMotionVector(uDispatchThreadId) ); + FFX_MIN16_I2 iPrevIdx = FFX_MIN16_I2(fPrevUV * FFX_MIN16_F2(RenderSize()) - 0.5f); + + FFX_MIN16_F3 colorPreAlpha = FFX_MIN16_F3( LoadOpaqueOnly( uDispatchThreadId ) ); + FFX_MIN16_F3 colorPostAlpha = FFX_MIN16_F3( LoadInputColor( uDispatchThreadId ) ); + + FFX_MIN16_F2 outReactiveMask = 0; + + outReactiveMask.y = ComputeTransparencyAndComposition(uDispatchThreadId, iPrevIdx); + + if (outReactiveMask.y > 0.5f) + { + outReactiveMask.x = ComputeReactive(uDispatchThreadId, iPrevIdx); + outReactiveMask.x *= FFX_MIN16_F(ReactiveScale()); + outReactiveMask.x = outReactiveMask.x < ReactiveMax() ? outReactiveMask.x : FFX_MIN16_F(ReactiveMax()); + } + + outReactiveMask.y *= FFX_MIN16_F( TcScale() ); + + outReactiveMask.x = max( outReactiveMask.x, FFX_MIN16_F( LoadReactiveMask(uDispatchThreadId) ) ); + outReactiveMask.y = max( outReactiveMask.y, FFX_MIN16_F( LoadTransparencyAndCompositionMask(uDispatchThreadId) ) ); + + StoreAutoReactive(uDispatchThreadId, outReactiveMask); + + StorePrevPreAlpha(uDispatchThreadId, colorPreAlpha); + StorePrevPostAlpha(uDispatchThreadId, colorPostAlpha); +} diff --git a/Shaders/shaders/ffx_fsr2_tcr_autogen_pass.hlsl.meta b/Shaders/shaders/ffx_fsr2_tcr_autogen_pass.hlsl.meta new file mode 100644 index 0000000..b6fd348 --- /dev/null +++ b/Shaders/shaders/ffx_fsr2_tcr_autogen_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: d96e2a0e75dd01d47a231e3c4d18be0c +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl b/Shaders/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl new file mode 100644 index 0000000..c000624 --- /dev/null +++ b/Shaders/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl @@ -0,0 +1,70 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 0 +#define FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS 1 +#if FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#define FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS 2 +#else +#define FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS 2 +#endif +#define FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED 3 +#define FSR3UPSCALER_BIND_SRV_LANCZOS_LUT 4 +#define FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH_MIP1 5 + +#define FSR3UPSCALER_BIND_SRV_CURRENT_LUMA 6 +#define FSR3UPSCALER_BIND_SRV_LUMA_INSTABILITY 7 +#define FSR3UPSCALER_BIND_SRV_INPUT_COLOR 8 + +#define FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED 0 +#define FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT 1 +#define FSR3UPSCALER_BIND_UAV_NEW_LOCKS 2 + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 + +#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" +#include "fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "fsr3upscaler/ffx_fsr3upscaler_sample.h" +#include "fsr3upscaler/ffx_fsr3upscaler_upsample.h" +#include "fsr3upscaler/ffx_fsr3upscaler_reproject.h" +#include "fsr3upscaler/ffx_fsr3upscaler_accumulate.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 +#endif // FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_PREFER_WAVE64 +FFX_FSR3UPSCALER_NUM_THREADS +FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT +void CS(int2 iDispatchThreadId : SV_DispatchThreadID) +{ + Accumulate(iDispatchThreadId); +} diff --git a/Shaders/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl.meta b/Shaders/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl.meta new file mode 100644 index 0000000..5cdfe03 --- /dev/null +++ b/Shaders/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 28932314459baa6449098e1b5862470c +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl b/Shaders/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl new file mode 100644 index 0000000..c052a1d --- /dev/null +++ b/Shaders/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl @@ -0,0 +1,78 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY 0 +#define FSR3UPSCALER_BIND_SRV_INPUT_COLOR 1 + +#define FSR3UPSCALER_BIND_UAV_AUTOREACTIVE 0 +#define FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION 1 + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 +#define FSR3UPSCALER_BIND_CB_REACTIVE 1 + +#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" +#include "fsr3upscaler/ffx_fsr3upscaler_common.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 +#endif // FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_FSR3UPSCALER_NUM_THREADS +FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT +void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) +{ + uint2 uDispatchThreadId = uGroupId * uint2(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT) + uGroupThreadId; + + float3 ColorPreAlpha = LoadOpaqueOnly( FFX_MIN16_I2(uDispatchThreadId) ).rgb; + float3 ColorPostAlpha = LoadInputColor(uDispatchThreadId).rgb; + + if (GenReactiveFlags() & FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_TONEMAP) + { + ColorPreAlpha = Tonemap(ColorPreAlpha); + ColorPostAlpha = Tonemap(ColorPostAlpha); + } + + if (GenReactiveFlags() & FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP) + { + ColorPreAlpha = InverseTonemap(ColorPreAlpha); + ColorPostAlpha = InverseTonemap(ColorPostAlpha); + } + + float out_reactive_value = 0.f; + float3 delta = abs(ColorPostAlpha - ColorPreAlpha); + + out_reactive_value = (GenReactiveFlags() & FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX) ? max(delta.x, max(delta.y, delta.z)) : length(delta); + out_reactive_value *= GenReactiveScale(); + + out_reactive_value = (GenReactiveFlags() & FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_THRESHOLD) ? (out_reactive_value < GenReactiveThreshold() ? 0 : GenReactiveBinaryValue()) : out_reactive_value; + + rw_output_autoreactive[uDispatchThreadId] = out_reactive_value; +} diff --git a/Shaders/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl.meta b/Shaders/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl.meta new file mode 100644 index 0000000..0eb056a --- /dev/null +++ b/Shaders/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 0731dfbd04968084582397e48795d9d8 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_fsr3upscaler_debug_view_pass.hlsl b/Shaders/shaders/ffx_fsr3upscaler_debug_view_pass.hlsl new file mode 100644 index 0000000..6ac5067 --- /dev/null +++ b/Shaders/shaders/ffx_fsr3upscaler_debug_view_pass.hlsl @@ -0,0 +1,56 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS 0 +#define FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS 1 +#define FSR3UPSCALER_BIND_SRV_DILATED_DEPTH 2 +#define FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED 3 +#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 4 + +#define FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT 0 + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 + +#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" +#include "fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "fsr3upscaler/ffx_fsr3upscaler_debug_view.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_PREFER_WAVE64 +FFX_FSR3UPSCALER_NUM_THREADS +FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT +void CS(FfxInt32x2 iPxPos : SV_DispatchThreadID) +{ + DebugView(iPxPos); +} diff --git a/Shaders/shaders/ffx_fsr3upscaler_debug_view_pass.hlsl.meta b/Shaders/shaders/ffx_fsr3upscaler_debug_view_pass.hlsl.meta new file mode 100644 index 0000000..2a75154 --- /dev/null +++ b/Shaders/shaders/ffx_fsr3upscaler_debug_view_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 00c8328353e3dba449b5e3855add5a35 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_fsr3upscaler_luma_instability_pass.hlsl b/Shaders/shaders/ffx_fsr3upscaler_luma_instability_pass.hlsl new file mode 100644 index 0000000..44d97b3 --- /dev/null +++ b/Shaders/shaders/ffx_fsr3upscaler_luma_instability_pass.hlsl @@ -0,0 +1,59 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 0 +#define FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS 1 +#define FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS 2 +#define FSR3UPSCALER_BIND_SRV_FRAME_INFO 3 +#define FSR3UPSCALER_BIND_SRV_LUMA_HISTORY 4 +#define FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH_MIP1 5 +#define FSR3UPSCALER_BIND_SRV_CURRENT_LUMA 6 + +#define FSR3UPSCALER_BIND_UAV_LUMA_HISTORY 0 +#define FSR3UPSCALER_BIND_UAV_LUMA_INSTABILITY 1 + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 + +#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" +#include "fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "fsr3upscaler/ffx_fsr3upscaler_luma_instability.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 +#endif // FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_PREFER_WAVE64 +FFX_FSR3UPSCALER_NUM_THREADS +FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT +void CS(int2 iDispatchThreadId : SV_DispatchThreadID) +{ + LumaInstability(iDispatchThreadId); +} diff --git a/Shaders/shaders/ffx_fsr3upscaler_luma_instability_pass.hlsl.meta b/Shaders/shaders/ffx_fsr3upscaler_luma_instability_pass.hlsl.meta new file mode 100644 index 0000000..7e261f7 --- /dev/null +++ b/Shaders/shaders/ffx_fsr3upscaler_luma_instability_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 68eed10cb1a8742438be0ba0c08d5b05 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_fsr3upscaler_luma_pyramid_pass.hlsl b/Shaders/shaders/ffx_fsr3upscaler_luma_pyramid_pass.hlsl new file mode 100644 index 0000000..37fa1b7 --- /dev/null +++ b/Shaders/shaders/ffx_fsr3upscaler_luma_pyramid_pass.hlsl @@ -0,0 +1,62 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR3UPSCALER_BIND_SRV_CURRENT_LUMA 0 +#define FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH 1 + +#define FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC 0 +#define FSR3UPSCALER_BIND_UAV_FRAME_INFO 1 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_0 2 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_1 3 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_2 4 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_3 5 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4 6 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5 7 +#define FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH_MIP1 8 + + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 +#define FSR3UPSCALER_BIND_CB_SPD 1 + +#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" +#include "fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "fsr3upscaler/ffx_fsr3upscaler_luma_pyramid.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 256 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_FSR3UPSCALER_NUM_THREADS +FFX_FSR3UPSCALER_EMBED_CB2_ROOTSIG_CONTENT +void CS(uint3 WorkGroupId : SV_GroupID, uint LocalThreadIndex : SV_GroupIndex) +{ + ComputeAutoExposure(WorkGroupId, LocalThreadIndex); +} diff --git a/Shaders/shaders/ffx_fsr3upscaler_luma_pyramid_pass.hlsl.meta b/Shaders/shaders/ffx_fsr3upscaler_luma_pyramid_pass.hlsl.meta new file mode 100644 index 0000000..71ca10c --- /dev/null +++ b/Shaders/shaders/ffx_fsr3upscaler_luma_pyramid_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 10b66892c68bbe1408580a330cccb381 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_fsr3upscaler_prepare_inputs_pass.hlsl b/Shaders/shaders/ffx_fsr3upscaler_prepare_inputs_pass.hlsl new file mode 100644 index 0000000..d40b7d2 --- /dev/null +++ b/Shaders/shaders/ffx_fsr3upscaler_prepare_inputs_pass.hlsl @@ -0,0 +1,58 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS 0 +#define FSR3UPSCALER_BIND_SRV_INPUT_DEPTH 1 +#define FSR3UPSCALER_BIND_SRV_INPUT_COLOR 2 + +#define FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS 0 +#define FSR3UPSCALER_BIND_UAV_DILATED_DEPTH 1 +#define FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 2 +#define FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH 3 +#define FSR3UPSCALER_BIND_UAV_CURRENT_LUMA 4 + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 + +#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" +#include "fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "fsr3upscaler/ffx_fsr3upscaler_prepare_inputs.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_PREFER_WAVE64 +FFX_FSR3UPSCALER_NUM_THREADS +FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT +void CS(int2 iDispatchThreadId : SV_DispatchThreadID) +{ + PrepareInputs(iDispatchThreadId); +} diff --git a/Shaders/shaders/ffx_fsr3upscaler_prepare_inputs_pass.hlsl.meta b/Shaders/shaders/ffx_fsr3upscaler_prepare_inputs_pass.hlsl.meta new file mode 100644 index 0000000..a56ca2a --- /dev/null +++ b/Shaders/shaders/ffx_fsr3upscaler_prepare_inputs_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: f6bb10da6010f8a479c9560de12f92f2 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_fsr3upscaler_prepare_reactivity_pass.hlsl b/Shaders/shaders/ffx_fsr3upscaler_prepare_reactivity_pass.hlsl new file mode 100644 index 0000000..b09e9b8 --- /dev/null +++ b/Shaders/shaders/ffx_fsr3upscaler_prepare_reactivity_pass.hlsl @@ -0,0 +1,62 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 +#define FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS 1 +#define FSR3UPSCALER_BIND_SRV_DILATED_DEPTH 2 +#define FSR3UPSCALER_BIND_SRV_REACTIVE_MASK 3 +#define FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 4 +#define FSR3UPSCALER_BIND_SRV_ACCUMULATION 5 +#define FSR3UPSCALER_BIND_SRV_SHADING_CHANGE 6 +#define FSR3UPSCALER_BIND_SRV_CURRENT_LUMA 7 +#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 8 + +#define FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS 0 +#define FSR3UPSCALER_BIND_UAV_NEW_LOCKS 1 +#define FSR3UPSCALER_BIND_UAV_ACCUMULATION 2 + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 + +#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" +#include "fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_PREFER_WAVE64 +FFX_FSR3UPSCALER_NUM_THREADS +FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT +void CS(int2 iDispatchThreadId : SV_DispatchThreadID) +{ + PrepareReactivity(iDispatchThreadId); +} diff --git a/Shaders/shaders/ffx_fsr3upscaler_prepare_reactivity_pass.hlsl.meta b/Shaders/shaders/ffx_fsr3upscaler_prepare_reactivity_pass.hlsl.meta new file mode 100644 index 0000000..6eb9fe3 --- /dev/null +++ b/Shaders/shaders/ffx_fsr3upscaler_prepare_reactivity_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 0ac3112d79ef65e4eb6f4787ffe37fdd +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_fsr3upscaler_rcas_pass.hlsl b/Shaders/shaders/ffx_fsr3upscaler_rcas_pass.hlsl new file mode 100644 index 0000000..2963562 --- /dev/null +++ b/Shaders/shaders/ffx_fsr3upscaler_rcas_pass.hlsl @@ -0,0 +1,53 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 0 +#define FSR3UPSCALER_BIND_SRV_RCAS_INPUT 1 + +#define FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT 0 + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 +#define FSR3UPSCALER_BIND_CB_RCAS 1 + +#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" +#include "fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "fsr3upscaler/ffx_fsr3upscaler_rcas.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 64 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_FSR3UPSCALER_NUM_THREADS +FFX_FSR3UPSCALER_EMBED_CB2_ROOTSIG_CONTENT +void CS(uint3 LocalThreadId : SV_GroupThreadID, uint3 WorkGroupId : SV_GroupID, uint3 Dtid : SV_DispatchThreadID) +{ + RCAS(LocalThreadId, WorkGroupId, Dtid); +} diff --git a/Shaders/shaders/ffx_fsr3upscaler_rcas_pass.hlsl.meta b/Shaders/shaders/ffx_fsr3upscaler_rcas_pass.hlsl.meta new file mode 100644 index 0000000..10533cf --- /dev/null +++ b/Shaders/shaders/ffx_fsr3upscaler_rcas_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 3e953864b9b462f468b64a78463b84aa +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_fsr3upscaler_shading_change_pass.hlsl b/Shaders/shaders/ffx_fsr3upscaler_shading_change_pass.hlsl new file mode 100644 index 0000000..ab2b545 --- /dev/null +++ b/Shaders/shaders/ffx_fsr3upscaler_shading_change_pass.hlsl @@ -0,0 +1,52 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR3UPSCALER_BIND_SRV_SPD_MIPS 0 + +#define FSR3UPSCALER_BIND_UAV_SHADING_CHANGE 0 + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 + +#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" +#include "fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "fsr3upscaler/ffx_fsr3upscaler_shading_change.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_PREFER_WAVE64 +FFX_FSR3UPSCALER_NUM_THREADS +FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT +void CS(int2 iDispatchThreadId : SV_DispatchThreadID) +{ + ShadingChange(iDispatchThreadId); +} diff --git a/Shaders/shaders/ffx_fsr3upscaler_shading_change_pass.hlsl.meta b/Shaders/shaders/ffx_fsr3upscaler_shading_change_pass.hlsl.meta new file mode 100644 index 0000000..e8c5fe8 --- /dev/null +++ b/Shaders/shaders/ffx_fsr3upscaler_shading_change_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 503052090415bdf4c9307ea6c5aa94b6 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.hlsl b/Shaders/shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.hlsl new file mode 100644 index 0000000..5403792 --- /dev/null +++ b/Shaders/shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.hlsl @@ -0,0 +1,63 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR3UPSCALER_BIND_SRV_CURRENT_LUMA 0 +#define FSR3UPSCALER_BIND_SRV_PREVIOUS_LUMA 1 +#define FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS 2 +#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 3 + + +#define FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC 0 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_0 1 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_1 2 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_2 3 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_3 4 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4 5 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5 6 + + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 +#define FSR3UPSCALER_BIND_CB_SPD 1 + +#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" +#include "fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 256 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_FSR3UPSCALER_NUM_THREADS +FFX_FSR3UPSCALER_EMBED_CB2_ROOTSIG_CONTENT +void CS(uint3 WorkGroupId : SV_GroupID, uint LocalThreadIndex : SV_GroupIndex) +{ + ComputeShadingChangePyramid(WorkGroupId, LocalThreadIndex); +} diff --git a/Shaders/shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.hlsl.meta b/Shaders/shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.hlsl.meta new file mode 100644 index 0000000..1694ed4 --- /dev/null +++ b/Shaders/shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 8a6fef31a72aae649803d98a0ced428d +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl b/Shaders/shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl new file mode 100644 index 0000000..8a3c882 --- /dev/null +++ b/Shaders/shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl @@ -0,0 +1,90 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY 0 +#define FSR3UPSCALER_BIND_SRV_INPUT_COLOR 1 +#define FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS 2 +#define FSR3UPSCALER_BIND_SRV_PREV_PRE_ALPHA_COLOR 3 +#define FSR3UPSCALER_BIND_SRV_PREV_POST_ALPHA_COLOR 4 +#define FSR3UPSCALER_BIND_SRV_REACTIVE_MASK 5 +#define FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 6 + +#define FSR3UPSCALER_BIND_UAV_AUTOREACTIVE 0 +#define FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION 1 +#define FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR 2 +#define FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR 3 + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 +#define FSR3UPSCALER_BIND_CB_AUTOREACTIVE 1 + +#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" +#include "fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 +#endif // FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_FSR3UPSCALER_NUM_THREADS +FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT +void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) +{ + FFX_MIN16_I2 uDispatchThreadId = FFX_MIN16_I2(uGroupId * uint2(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT) + uGroupThreadId); + + // ToDo: take into account jitter (i.e. add delta of previous jitter and current jitter to previous UV + // fetch pre- and post-alpha color values + FFX_MIN16_F2 fUv = ( FFX_MIN16_F2(uDispatchThreadId) + FFX_MIN16_F2(0.5f, 0.5f) ) / FFX_MIN16_F2( RenderSize() ); + FFX_MIN16_F2 fPrevUV = fUv + FFX_MIN16_F2( LoadInputMotionVector(uDispatchThreadId) ); + FFX_MIN16_I2 iPrevIdx = FFX_MIN16_I2(fPrevUV * FFX_MIN16_F2(RenderSize()) - 0.5f); + + FFX_MIN16_F3 colorPreAlpha = FFX_MIN16_F3( LoadOpaqueOnly( uDispatchThreadId ) ); + FFX_MIN16_F3 colorPostAlpha = FFX_MIN16_F3( LoadInputColor( uDispatchThreadId ) ); + + FFX_MIN16_F2 outReactiveMask = 0; + + outReactiveMask.y = ComputeTransparencyAndComposition(uDispatchThreadId, iPrevIdx); + + if (outReactiveMask.y > 0.5f) + { + outReactiveMask.x = ComputeReactive(uDispatchThreadId, iPrevIdx); + outReactiveMask.x *= FFX_MIN16_F(fReactiveScale); + outReactiveMask.x = outReactiveMask.x < fReactiveMax ? outReactiveMask.x : FFX_MIN16_F( fReactiveMax ); + } + + outReactiveMask.y *= FFX_MIN16_F(fTcScale ); + + outReactiveMask.x = max( outReactiveMask.x, FFX_MIN16_F( LoadReactiveMask(uDispatchThreadId) ) ); + outReactiveMask.y = max( outReactiveMask.y, FFX_MIN16_F( LoadTransparencyAndCompositionMask(uDispatchThreadId) ) ); + + StoreAutoReactive(uDispatchThreadId, outReactiveMask); + + StorePrevPreAlpha(uDispatchThreadId, colorPreAlpha); + StorePrevPostAlpha(uDispatchThreadId, colorPostAlpha); +} diff --git a/Shaders/shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl.meta b/Shaders/shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl.meta new file mode 100644 index 0000000..c6923a5 --- /dev/null +++ b/Shaders/shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 877b3428f6073db41b9fc80bb6c6d4d7 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr1.meta b/Shaders/shaders/fsr1.meta new file mode 100644 index 0000000..e52d0ac --- /dev/null +++ b/Shaders/shaders/fsr1.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: dafd228fb8a2f4e478fe1df6d38589f9 +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr1/ffx_fsr1.h b/Shaders/shaders/fsr1/ffx_fsr1.h new file mode 100644 index 0000000..82ebf21 --- /dev/null +++ b/Shaders/shaders/fsr1/ffx_fsr1.h @@ -0,0 +1,1252 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +/// @defgroup FfxGPUFsr1 FidelityFX FSR1 +/// FidelityFX Super Resolution 1 GPU documentation +/// +/// @ingroup FfxGPUEffects + +/// Setup required constant values for EASU (works on CPU or GPU). +/// +/// @param [out] con0 +/// @param [out] con1 +/// @param [out] con2 +/// @param [out] con3 +/// @param [in] inputViewportInPixelsX The rendered image resolution being upscaled in X dimension. +/// @param [in] inputViewportInPixelsY The rendered image resolution being upscaled in Y dimension. +/// @param [in] inputSizeInPixelsX The resolution of the resource containing the input image (useful for dynamic resolution) in X dimension. +/// @param [in] inputSizeInPixelsY The resolution of the resource containing the input image (useful for dynamic resolution) in Y dimension. +/// @param [in] outputSizeInPixelsX The display resolution which the input image gets upscaled to in X dimension. +/// @param [in] outputSizeInPixelsY The display resolution which the input image gets upscaled to in Y dimension. +/// +/// @ingroup FfxGPUFsr1 +FFX_STATIC void ffxFsrPopulateEasuConstants( + FFX_PARAMETER_INOUT FfxUInt32x4 con0, + FFX_PARAMETER_INOUT FfxUInt32x4 con1, + FFX_PARAMETER_INOUT FfxUInt32x4 con2, + FFX_PARAMETER_INOUT FfxUInt32x4 con3, + FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsX, + FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsY, + FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsX, + FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsY, + FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsX, + FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsY) +{ + // Output integer position to a pixel position in viewport. + con0[0] = ffxAsUInt32(inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX)); + con0[1] = ffxAsUInt32(inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY)); + con0[2] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX) - FfxFloat32(0.5)); + con0[3] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY) - FfxFloat32(0.5)); + + // Viewport pixel position to normalized image space. + // This is used to get upper-left of 'F' tap. + con1[0] = ffxAsUInt32(ffxReciprocal(inputSizeInPixelsX)); + con1[1] = ffxAsUInt32(ffxReciprocal(inputSizeInPixelsY)); + + // Centers of gather4, first offset from upper-left of 'F'. + // +---+---+ + // | | | + // +--(0)--+ + // | b | c | + // +---F---+---+---+ + // | e | f | g | h | + // +--(1)--+--(2)--+ + // | i | j | k | l | + // +---+---+---+---+ + // | n | o | + // +--(3)--+ + // | | | + // +---+---+ + con1[2] = ffxAsUInt32(FfxFloat32(1.0) * ffxReciprocal(inputSizeInPixelsX)); + con1[3] = ffxAsUInt32(FfxFloat32(-1.0) * ffxReciprocal(inputSizeInPixelsY)); + + // These are from (0) instead of 'F'. + con2[0] = ffxAsUInt32(FfxFloat32(-1.0) * ffxReciprocal(inputSizeInPixelsX)); + con2[1] = ffxAsUInt32(FfxFloat32(2.0) * ffxReciprocal(inputSizeInPixelsY)); + con2[2] = ffxAsUInt32(FfxFloat32(1.0) * ffxReciprocal(inputSizeInPixelsX)); + con2[3] = ffxAsUInt32(FfxFloat32(2.0) * ffxReciprocal(inputSizeInPixelsY)); + con3[0] = ffxAsUInt32(FfxFloat32(0.0) * ffxReciprocal(inputSizeInPixelsX)); + con3[1] = ffxAsUInt32(FfxFloat32(4.0) * ffxReciprocal(inputSizeInPixelsY)); + con3[2] = con3[3] = 0; +} + +/// Setup required constant values for EASU (works on CPU or GPU). +/// +/// @param [out] con0 +/// @param [out] con1 +/// @param [out] con2 +/// @param [out] con3 +/// @param [in] inputViewportInPixelsX The resolution of the input in the X dimension. +/// @param [in] inputViewportInPixelsY The resolution of the input in the Y dimension. +/// @param [in] inputSizeInPixelsX The input size in pixels in the X dimension. +/// @param [in] inputSizeInPixelsY The input size in pixels in the Y dimension. +/// @param [in] outputSizeInPixelsX The output size in pixels in the X dimension. +/// @param [in] outputSizeInPixelsY The output size in pixels in the Y dimension. +/// @param [in] inputOffsetInPixelsX The input image offset in the X dimension into the resource containing it (useful for dynamic resolution). +/// @param [in] inputOffsetInPixelsY The input image offset in the Y dimension into the resource containing it (useful for dynamic resolution). +/// +/// @ingroup FfxGPUFsr1 +FFX_STATIC void ffxFsrPopulateEasuConstantsOffset( + FFX_PARAMETER_INOUT FfxUInt32x4 con0, + FFX_PARAMETER_INOUT FfxUInt32x4 con1, + FFX_PARAMETER_INOUT FfxUInt32x4 con2, + FFX_PARAMETER_INOUT FfxUInt32x4 con3, + FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsX, + FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsY, + FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsX, + FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsY, + FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsX, + FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsY, + FFX_PARAMETER_IN FfxFloat32 inputOffsetInPixelsX, + FFX_PARAMETER_IN FfxFloat32 inputOffsetInPixelsY) +{ + ffxFsrPopulateEasuConstants( + con0, + con1, + con2, + con3, + inputViewportInPixelsX, + inputViewportInPixelsY, + inputSizeInPixelsX, + inputSizeInPixelsY, + outputSizeInPixelsX, + outputSizeInPixelsY); + + // override + con0[2] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX) - FfxFloat32(0.5) + inputOffsetInPixelsX); + con0[3] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY) - FfxFloat32(0.5) + inputOffsetInPixelsY); +} + +#if defined(FFX_GPU) && defined(FFX_FSR_EASU_FLOAT) +// Input callback prototypes, need to be implemented by calling shader +FfxFloat32x4 FsrEasuRF(FfxFloat32x2 p); +FfxFloat32x4 FsrEasuGF(FfxFloat32x2 p); +FfxFloat32x4 FsrEasuBF(FfxFloat32x2 p); + +// Filtering for a given tap for the scalar. +void fsrEasuTapFloat( + FFX_PARAMETER_INOUT FfxFloat32x3 accumulatedColor, // Accumulated color, with negative lobe. + FFX_PARAMETER_INOUT FfxFloat32 accumulatedWeight, // Accumulated weight. + FFX_PARAMETER_IN FfxFloat32x2 pixelOffset, // Pixel offset from resolve position to tap. + FFX_PARAMETER_IN FfxFloat32x2 gradientDirection, // Gradient direction. + FFX_PARAMETER_IN FfxFloat32x2 length, // Length. + FFX_PARAMETER_IN FfxFloat32 negativeLobeStrength, // Negative lobe strength. + FFX_PARAMETER_IN FfxFloat32 clippingPoint, // Clipping point. + FFX_PARAMETER_IN FfxFloat32x3 color) // Tap color. +{ + // Rotate offset by direction. + FfxFloat32x2 rotatedOffset; + rotatedOffset.x = (pixelOffset.x * (gradientDirection.x)) + (pixelOffset.y * gradientDirection.y); + rotatedOffset.y = (pixelOffset.x * (-gradientDirection.y)) + (pixelOffset.y * gradientDirection.x); + + // Anisotropy. + rotatedOffset *= length; + + // Compute distance^2. + FfxFloat32 distanceSquared = rotatedOffset.x * rotatedOffset.x + rotatedOffset.y * rotatedOffset.y; + + // Limit to the window as at corner, 2 taps can easily be outside. + distanceSquared = ffxMin(distanceSquared, clippingPoint); + + // Approximation of lancos2 without sin() or rcp(), or sqrt() to get x. + // (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2 + // |_______________________________________| |_______________| + // base window + // The general form of the 'base' is, + // (a*(b*x^2-1)^2-(a-1)) + // Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe. + FfxFloat32 weightB = FfxFloat32(2.0 / 5.0) * distanceSquared + FfxFloat32(-1.0); + FfxFloat32 weightA = negativeLobeStrength * distanceSquared + FfxFloat32(-1.0); + weightB *= weightB; + weightA *= weightA; + weightB = FfxFloat32(25.0 / 16.0) * weightB + FfxFloat32(-(25.0 / 16.0 - 1.0)); + FfxFloat32 weight = weightB * weightA; + + // Do weighted average. + accumulatedColor += color * weight; + accumulatedWeight += weight; +} + +// Accumulate direction and length. +void fsrEasuSetFloat( + FFX_PARAMETER_INOUT FfxFloat32x2 direction, + FFX_PARAMETER_INOUT FfxFloat32 length, + FFX_PARAMETER_IN FfxFloat32x2 pp, + FFX_PARAMETER_IN FfxBoolean biS, + FFX_PARAMETER_IN FfxBoolean biT, + FFX_PARAMETER_IN FfxBoolean biU, + FFX_PARAMETER_IN FfxBoolean biV, + FFX_PARAMETER_IN FfxFloat32 lA, + FFX_PARAMETER_IN FfxFloat32 lB, + FFX_PARAMETER_IN FfxFloat32 lC, + FFX_PARAMETER_IN FfxFloat32 lD, + FFX_PARAMETER_IN FfxFloat32 lE) +{ + // Compute bilinear weight, branches factor out as predicates are compiler time immediates. + // s t + // u v + FfxFloat32 weight = FfxFloat32(0.0); + if (biS) + weight = (FfxFloat32(1.0) - pp.x) * (FfxFloat32(1.0) - pp.y); + if (biT) + weight = pp.x * (FfxFloat32(1.0) - pp.y); + if (biU) + weight = (FfxFloat32(1.0) - pp.x) * pp.y; + if (biV) + weight = pp.x * pp.y; + + // Direction is the '+' diff. + // a + // b c d + // e + // Then takes magnitude from abs average of both sides of 'c'. + // Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms. + FfxFloat32 dc = lD - lC; + FfxFloat32 cb = lC - lB; + FfxFloat32 lengthX = max(abs(dc), abs(cb)); + lengthX = ffxApproximateReciprocal(lengthX); + FfxFloat32 directionX = lD - lB; + direction.x += directionX * weight; + lengthX = ffxSaturate(abs(directionX) * lengthX); + lengthX *= lengthX; + length += lengthX * weight; + + // Repeat for the y axis. + FfxFloat32 ec = lE - lC; + FfxFloat32 ca = lC - lA; + FfxFloat32 lengthY = max(abs(ec), abs(ca)); + lengthY = ffxApproximateReciprocal(lengthY); + FfxFloat32 directionY = lE - lA; + direction.y += directionY * weight; + lengthY = ffxSaturate(abs(directionY) * lengthY); + lengthY *= lengthY; + length += lengthY * weight; +} + +/// Apply edge-aware spatial upsampling using 32bit floating point precision calculations. +/// +/// @param [out] outPixel The computed color of a pixel. +/// @param [in] integerPosition Integer pixel position within the output. +/// @param [in] con0 The first constant value generated by ffxFsrPopulateEasuConstants. +/// @param [in] con1 The second constant value generated by ffxFsrPopulateEasuConstants. +/// @param [in] con2 The third constant value generated by ffxFsrPopulateEasuConstants. +/// @param [in] con3 The fourth constant value generated by ffxFsrPopulateEasuConstants. +/// +/// @ingroup FSR +void ffxFsrEasuFloat( + FFX_PARAMETER_OUT FfxFloat32x3 pix, + FFX_PARAMETER_IN FfxUInt32x2 ip, + FFX_PARAMETER_IN FfxUInt32x4 con0, + FFX_PARAMETER_IN FfxUInt32x4 con1, + FFX_PARAMETER_IN FfxUInt32x4 con2, + FFX_PARAMETER_IN FfxUInt32x4 con3) +{ + // Get position of 'f'. + FfxFloat32x2 pp = FfxFloat32x2(ip) * ffxAsFloat(con0.xy) + ffxAsFloat(con0.zw); + FfxFloat32x2 fp = floor(pp); + pp -= fp; + + // 12-tap kernel. + // b c + // e f g h + // i j k l + // n o + // Gather 4 ordering. + // a b + // r g + // For packed FP16, need either {rg} or {ab} so using the following setup for gather in all versions, + // a b <- unused (z) + // r g + // a b a b + // r g r g + // a b + // r g <- unused (z) + // Allowing dead-code removal to remove the 'z's. + FfxFloat32x2 p0 = fp * ffxAsFloat(con1.xy) + ffxAsFloat(con1.zw); + + // These are from p0 to avoid pulling two constants on pre-Navi hardware. + FfxFloat32x2 p1 = p0 + ffxAsFloat(con2.xy); + FfxFloat32x2 p2 = p0 + ffxAsFloat(con2.zw); + FfxFloat32x2 p3 = p0 + ffxAsFloat(con3.xy); + FfxFloat32x4 bczzR = FsrEasuRF(p0); + FfxFloat32x4 bczzG = FsrEasuGF(p0); + FfxFloat32x4 bczzB = FsrEasuBF(p0); + FfxFloat32x4 ijfeR = FsrEasuRF(p1); + FfxFloat32x4 ijfeG = FsrEasuGF(p1); + FfxFloat32x4 ijfeB = FsrEasuBF(p1); + FfxFloat32x4 klhgR = FsrEasuRF(p2); + FfxFloat32x4 klhgG = FsrEasuGF(p2); + FfxFloat32x4 klhgB = FsrEasuBF(p2); + FfxFloat32x4 zzonR = FsrEasuRF(p3); + FfxFloat32x4 zzonG = FsrEasuGF(p3); + FfxFloat32x4 zzonB = FsrEasuBF(p3); + + // Simplest multi-channel approximate luma possible (luma times 2, in 2 FMA/MAD). + FfxFloat32x4 bczzL = bczzB * ffxBroadcast4(0.5) + (bczzR * ffxBroadcast4(0.5) + bczzG); + FfxFloat32x4 ijfeL = ijfeB * ffxBroadcast4(0.5) + (ijfeR * ffxBroadcast4(0.5) + ijfeG); + FfxFloat32x4 klhgL = klhgB * ffxBroadcast4(0.5) + (klhgR * ffxBroadcast4(0.5) + klhgG); + FfxFloat32x4 zzonL = zzonB * ffxBroadcast4(0.5) + (zzonR * ffxBroadcast4(0.5) + zzonG); + + // Rename. + FfxFloat32 bL = bczzL.x; + FfxFloat32 cL = bczzL.y; + FfxFloat32 iL = ijfeL.x; + FfxFloat32 jL = ijfeL.y; + FfxFloat32 fL = ijfeL.z; + FfxFloat32 eL = ijfeL.w; + FfxFloat32 kL = klhgL.x; + FfxFloat32 lL = klhgL.y; + FfxFloat32 hL = klhgL.z; + FfxFloat32 gL = klhgL.w; + FfxFloat32 oL = zzonL.z; + FfxFloat32 nL = zzonL.w; + + // Accumulate for bilinear interpolation. + FfxFloat32x2 dir = ffxBroadcast2(0.0); + FfxFloat32 len = FfxFloat32(0.0); + fsrEasuSetFloat(dir, len, pp, FFX_TRUE, FFX_FALSE, FFX_FALSE, FFX_FALSE, bL, eL, fL, gL, jL); + fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_TRUE, FFX_FALSE, FFX_FALSE, cL, fL, gL, hL, kL); + fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_FALSE, FFX_TRUE, FFX_FALSE, fL, iL, jL, kL, nL); + fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_FALSE, FFX_FALSE, FFX_TRUE, gL, jL, kL, lL, oL); + + // Normalize with approximation, and cleanup close to zero. + FfxFloat32x2 dir2 = dir * dir; + FfxFloat32 dirR = dir2.x + dir2.y; + FfxBoolean zro = dirR < FfxFloat32(1.0 / 32768.0); + dirR = ffxApproximateReciprocalSquareRoot(dirR); + dirR = zro ? FfxFloat32(1.0) : dirR; + dir.x = zro ? FfxFloat32(1.0) : dir.x; + dir *= ffxBroadcast2(dirR); + + // Transform from {0 to 2} to {0 to 1} range, and shape with square. + len = len * FfxFloat32(0.5); + len *= len; + + // Stretch kernel {1.0 vert|horz, to sqrt(2.0) on diagonal}. + FfxFloat32 stretch = (dir.x * dir.x + dir.y * dir.y) * ffxApproximateReciprocal(max(abs(dir.x), abs(dir.y))); + + // Anisotropic length after rotation, + // x := 1.0 lerp to 'stretch' on edges + // y := 1.0 lerp to 2x on edges + FfxFloat32x2 len2 = FfxFloat32x2(FfxFloat32(1.0) + (stretch - FfxFloat32(1.0)) * len, FfxFloat32(1.0) + FfxFloat32(-0.5) * len); + + // Based on the amount of 'edge', + // the window shifts from +/-{sqrt(2.0) to slightly beyond 2.0}. + FfxFloat32 lob = FfxFloat32(0.5) + FfxFloat32((1.0 / 4.0 - 0.04) - 0.5) * len; + + // Set distance^2 clipping point to the end of the adjustable window. + FfxFloat32 clp = ffxApproximateReciprocal(lob); + + // Accumulation mixed with min/max of 4 nearest. + // b c + // e f g h + // i j k l + // n o + FfxFloat32x3 min4 = + ffxMin(ffxMin3(FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z), FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w), FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)), + FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x)); + FfxFloat32x3 max4 = + max(ffxMax3(FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z), FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w), FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)), FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x)); + + // Accumulation. + FfxFloat32x3 aC = ffxBroadcast3(0.0); + FfxFloat32 aW = FfxFloat32(0.0); + fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, -1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(bczzR.x, bczzG.x, bczzB.x)); // b + fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, -1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(bczzR.y, bczzG.y, bczzB.y)); // c + fsrEasuTapFloat(aC, aW, FfxFloat32x2(-1.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.x, ijfeG.x, ijfeB.x)); // i + fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)); // j + fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z)); // f + fsrEasuTapFloat(aC, aW, FfxFloat32x2(-1.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.w, ijfeG.w, ijfeB.w)); // e + fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x)); // k + fsrEasuTapFloat(aC, aW, FfxFloat32x2(2.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.y, klhgG.y, klhgB.y)); // l + fsrEasuTapFloat(aC, aW, FfxFloat32x2(2.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.z, klhgG.z, klhgB.z)); // h + fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w)); // g + fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.z, zzonG.z, zzonB.z)); // o + fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.w, zzonG.w, zzonB.w)); // n + + // Normalize and dering. + pix = ffxMin(max4, max(min4, aC * ffxBroadcast3(ffxReciprocal(aW)))); +} +#endif // #if defined(FFX_GPU) && defined(FFX_FSR_EASU_FLOAT) + +#if defined(FFX_GPU) && FFX_HALF == 1 && defined(FFX_FSR_EASU_HALF) +// Input callback prototypes, need to be implemented by calling shader +FfxFloat16x4 FsrEasuRH(FfxFloat32x2 p); +FfxFloat16x4 FsrEasuGH(FfxFloat32x2 p); +FfxFloat16x4 FsrEasuBH(FfxFloat32x2 p); + +// This runs 2 taps in parallel. +void FsrEasuTapH( + FFX_PARAMETER_INOUT FfxFloat16x2 aCR, + FFX_PARAMETER_INOUT FfxFloat16x2 aCG, + FFX_PARAMETER_INOUT FfxFloat16x2 aCB, + FFX_PARAMETER_INOUT FfxFloat16x2 aW, + FFX_PARAMETER_IN FfxFloat16x2 offX, + FFX_PARAMETER_IN FfxFloat16x2 offY, + FFX_PARAMETER_IN FfxFloat16x2 dir, + FFX_PARAMETER_IN FfxFloat16x2 len, + FFX_PARAMETER_IN FfxFloat16 lob, + FFX_PARAMETER_IN FfxFloat16 clp, + FFX_PARAMETER_IN FfxFloat16x2 cR, + FFX_PARAMETER_IN FfxFloat16x2 cG, + FFX_PARAMETER_IN FfxFloat16x2 cB) +{ + FfxFloat16x2 vX, vY; + vX = offX * dir.xx + offY * dir.yy; + vY = offX * (-dir.yy) + offY * dir.xx; + vX *= len.x; + vY *= len.y; + FfxFloat16x2 d2 = vX * vX + vY * vY; + d2 = min(d2, FFX_BROADCAST_FLOAT16X2(clp)); + FfxFloat16x2 wB = FFX_BROADCAST_FLOAT16X2(2.0 / 5.0) * d2 + FFX_BROADCAST_FLOAT16X2(-1.0); + FfxFloat16x2 wA = FFX_BROADCAST_FLOAT16X2(lob) * d2 + FFX_BROADCAST_FLOAT16X2(-1.0); + wB *= wB; + wA *= wA; + wB = FFX_BROADCAST_FLOAT16X2(25.0 / 16.0) * wB + FFX_BROADCAST_FLOAT16X2(-(25.0 / 16.0 - 1.0)); + FfxFloat16x2 w = wB * wA; + aCR += cR * w; + aCG += cG * w; + aCB += cB * w; + aW += w; +} + +// This runs 2 taps in parallel. +void FsrEasuSetH( + FFX_PARAMETER_INOUT FfxFloat16x2 dirPX, + FFX_PARAMETER_INOUT FfxFloat16x2 dirPY, + FFX_PARAMETER_INOUT FfxFloat16x2 lenP, + FFX_PARAMETER_IN FfxFloat16x2 pp, + FFX_PARAMETER_IN FfxBoolean biST, + FFX_PARAMETER_IN FfxBoolean biUV, + FFX_PARAMETER_IN FfxFloat16x2 lA, + FFX_PARAMETER_IN FfxFloat16x2 lB, + FFX_PARAMETER_IN FfxFloat16x2 lC, + FFX_PARAMETER_IN FfxFloat16x2 lD, + FFX_PARAMETER_IN FfxFloat16x2 lE) +{ + FfxFloat16x2 w = FFX_BROADCAST_FLOAT16X2(0.0); + + if (biST) + w = (FfxFloat16x2(1.0, 0.0) + FfxFloat16x2(-pp.x, pp.x)) * FFX_BROADCAST_FLOAT16X2(FFX_BROADCAST_FLOAT16(1.0) - pp.y); + + if (biUV) + w = (FfxFloat16x2(1.0, 0.0) + FfxFloat16x2(-pp.x, pp.x)) * FFX_BROADCAST_FLOAT16X2(pp.y); + + // ABS is not free in the packed FP16 path. + FfxFloat16x2 dc = lD - lC; + FfxFloat16x2 cb = lC - lB; + FfxFloat16x2 lenX = max(abs(dc), abs(cb)); + lenX = ffxReciprocalHalf(lenX); + + FfxFloat16x2 dirX = lD - lB; + dirPX += dirX * w; + lenX = ffxSaturate(abs(dirX) * lenX); + lenX *= lenX; + lenP += lenX * w; + FfxFloat16x2 ec = lE - lC; + FfxFloat16x2 ca = lC - lA; + FfxFloat16x2 lenY = max(abs(ec), abs(ca)); + lenY = ffxReciprocalHalf(lenY); + FfxFloat16x2 dirY = lE - lA; + dirPY += dirY * w; + lenY = ffxSaturate(abs(dirY) * lenY); + lenY *= lenY; + lenP += lenY * w; +} + +void FsrEasuH( + FFX_PARAMETER_OUT FfxFloat16x3 pix, + FFX_PARAMETER_IN FfxUInt32x2 ip, + FFX_PARAMETER_IN FfxUInt32x4 con0, + FFX_PARAMETER_IN FfxUInt32x4 con1, + FFX_PARAMETER_IN FfxUInt32x4 con2, + FFX_PARAMETER_IN FfxUInt32x4 con3) +{ + FfxFloat32x2 pp = FfxFloat32x2(ip) * ffxAsFloat(con0.xy) + ffxAsFloat(con0.zw); + FfxFloat32x2 fp = floor(pp); + pp -= fp; + FfxFloat16x2 ppp = FfxFloat16x2(pp); + + FfxFloat32x2 p0 = fp * ffxAsFloat(con1.xy) + ffxAsFloat(con1.zw); + FfxFloat32x2 p1 = p0 + ffxAsFloat(con2.xy); + FfxFloat32x2 p2 = p0 + ffxAsFloat(con2.zw); + FfxFloat32x2 p3 = p0 + ffxAsFloat(con3.xy); + FfxFloat16x4 bczzR = FsrEasuRH(p0); + FfxFloat16x4 bczzG = FsrEasuGH(p0); + FfxFloat16x4 bczzB = FsrEasuBH(p0); + FfxFloat16x4 ijfeR = FsrEasuRH(p1); + FfxFloat16x4 ijfeG = FsrEasuGH(p1); + FfxFloat16x4 ijfeB = FsrEasuBH(p1); + FfxFloat16x4 klhgR = FsrEasuRH(p2); + FfxFloat16x4 klhgG = FsrEasuGH(p2); + FfxFloat16x4 klhgB = FsrEasuBH(p2); + FfxFloat16x4 zzonR = FsrEasuRH(p3); + FfxFloat16x4 zzonG = FsrEasuGH(p3); + FfxFloat16x4 zzonB = FsrEasuBH(p3); + + FfxFloat16x4 bczzL = bczzB * FFX_BROADCAST_FLOAT16X4(0.5) + (bczzR * FFX_BROADCAST_FLOAT16X4(0.5) + bczzG); + FfxFloat16x4 ijfeL = ijfeB * FFX_BROADCAST_FLOAT16X4(0.5) + (ijfeR * FFX_BROADCAST_FLOAT16X4(0.5) + ijfeG); + FfxFloat16x4 klhgL = klhgB * FFX_BROADCAST_FLOAT16X4(0.5) + (klhgR * FFX_BROADCAST_FLOAT16X4(0.5) + klhgG); + FfxFloat16x4 zzonL = zzonB * FFX_BROADCAST_FLOAT16X4(0.5) + (zzonR * FFX_BROADCAST_FLOAT16X4(0.5) + zzonG); + FfxFloat16 bL = bczzL.x; + FfxFloat16 cL = bczzL.y; + FfxFloat16 iL = ijfeL.x; + FfxFloat16 jL = ijfeL.y; + FfxFloat16 fL = ijfeL.z; + FfxFloat16 eL = ijfeL.w; + FfxFloat16 kL = klhgL.x; + FfxFloat16 lL = klhgL.y; + FfxFloat16 hL = klhgL.z; + FfxFloat16 gL = klhgL.w; + FfxFloat16 oL = zzonL.z; + FfxFloat16 nL = zzonL.w; + + // This part is different, accumulating 2 taps in parallel. + FfxFloat16x2 dirPX = FFX_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 dirPY = FFX_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 lenP = FFX_BROADCAST_FLOAT16X2(0.0); + FsrEasuSetH(dirPX, + dirPY, + lenP, + ppp, + FfxBoolean(true), + FfxBoolean(false), + FfxFloat16x2(bL, cL), + FfxFloat16x2(eL, fL), + FfxFloat16x2(fL, gL), + FfxFloat16x2(gL, hL), + FfxFloat16x2(jL, kL)); + FsrEasuSetH(dirPX, + dirPY, + lenP, + ppp, + FfxBoolean(false), + FfxBoolean(true), + FfxFloat16x2(fL, gL), + FfxFloat16x2(iL, jL), + FfxFloat16x2(jL, kL), + FfxFloat16x2(kL, lL), + FfxFloat16x2(nL, oL)); + FfxFloat16x2 dir = FfxFloat16x2(dirPX.r + dirPX.g, dirPY.r + dirPY.g); + FfxFloat16 len = lenP.r + lenP.g; + + FfxFloat16x2 dir2 = dir * dir; + FfxFloat16 dirR = dir2.x + dir2.y; + FfxUInt32 zro = FfxUInt32(dirR < FFX_BROADCAST_FLOAT16(1.0 / 32768.0)); + dirR = ffxApproximateReciprocalSquareRootHalf(dirR); + dirR = (zro > 0) ? FFX_BROADCAST_FLOAT16(1.0) : dirR; + dir.x = (zro > 0) ? FFX_BROADCAST_FLOAT16(1.0) : dir.x; + dir *= FFX_BROADCAST_FLOAT16X2(dirR); + len = len * FFX_BROADCAST_FLOAT16(0.5); + len *= len; + FfxFloat16 stretch = (dir.x * dir.x + dir.y * dir.y) * ffxApproximateReciprocalHalf(max(abs(dir.x), abs(dir.y))); + FfxFloat16x2 len2 = + FfxFloat16x2(FFX_BROADCAST_FLOAT16(1.0) + (stretch - FFX_BROADCAST_FLOAT16(1.0)) * len, FFX_BROADCAST_FLOAT16(1.0) + FFX_BROADCAST_FLOAT16(-0.5) * len); + FfxFloat16 lob = FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16((1.0 / 4.0 - 0.04) - 0.5) * len; + FfxFloat16 clp = ffxApproximateReciprocalHalf(lob); + + // FP16 is different, using packed trick to do min and max in same operation. + FfxFloat16x2 bothR = + max(max(FfxFloat16x2(-ijfeR.z, ijfeR.z), FfxFloat16x2(-klhgR.w, klhgR.w)), max(FfxFloat16x2(-ijfeR.y, ijfeR.y), FfxFloat16x2(-klhgR.x, klhgR.x))); + FfxFloat16x2 bothG = + max(max(FfxFloat16x2(-ijfeG.z, ijfeG.z), FfxFloat16x2(-klhgG.w, klhgG.w)), max(FfxFloat16x2(-ijfeG.y, ijfeG.y), FfxFloat16x2(-klhgG.x, klhgG.x))); + FfxFloat16x2 bothB = + max(max(FfxFloat16x2(-ijfeB.z, ijfeB.z), FfxFloat16x2(-klhgB.w, klhgB.w)), max(FfxFloat16x2(-ijfeB.y, ijfeB.y), FfxFloat16x2(-klhgB.x, klhgB.x))); + + // This part is different for FP16, working pairs of taps at a time. + FfxFloat16x2 pR = FFX_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 pG = FFX_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 pB = FFX_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 pW = FFX_BROADCAST_FLOAT16X2(0.0); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(0.0, 1.0) - ppp.xx, FfxFloat16x2(-1.0, -1.0) - ppp.yy, dir, len2, lob, clp, bczzR.xy, bczzG.xy, bczzB.xy); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(-1.0, 0.0) - ppp.xx, FfxFloat16x2(1.0, 1.0) - ppp.yy, dir, len2, lob, clp, ijfeR.xy, ijfeG.xy, ijfeB.xy); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(0.0, -1.0) - ppp.xx, FfxFloat16x2(0.0, 0.0) - ppp.yy, dir, len2, lob, clp, ijfeR.zw, ijfeG.zw, ijfeB.zw); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(1.0, 2.0) - ppp.xx, FfxFloat16x2(1.0, 1.0) - ppp.yy, dir, len2, lob, clp, klhgR.xy, klhgG.xy, klhgB.xy); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(2.0, 1.0) - ppp.xx, FfxFloat16x2(0.0, 0.0) - ppp.yy, dir, len2, lob, clp, klhgR.zw, klhgG.zw, klhgB.zw); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(1.0, 0.0) - ppp.xx, FfxFloat16x2(2.0, 2.0) - ppp.yy, dir, len2, lob, clp, zzonR.zw, zzonG.zw, zzonB.zw); + FfxFloat16x3 aC = FfxFloat16x3(pR.x + pR.y, pG.x + pG.y, pB.x + pB.y); + FfxFloat16 aW = pW.x + pW.y; + + // Slightly different for FP16 version due to combined min and max. + pix = min(FfxFloat16x3(bothR.y, bothG.y, bothB.y), max(-FfxFloat16x3(bothR.x, bothG.x, bothB.x), aC * FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(aW)))); +} +#endif // #if defined(FFX_GPU) && defined(FFX_HALF) && defined(FFX_FSR_EASU_HALF) + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [RCAS] ROBUST CONTRAST ADAPTIVE SHARPENING +// +//------------------------------------------------------------------------------------------------------------------------------ +// CAS uses a simplified mechanism to convert local contrast into a variable amount of sharpness. +// RCAS uses a more exact mechanism, solving for the maximum local sharpness possible before clipping. +// RCAS also has a built in process to limit sharpening of what it detects as possible noise. +// RCAS sharper does not support scaling, as it should be applied after EASU scaling. +// Pass EASU output straight into RCAS, no color conversions necessary. +//------------------------------------------------------------------------------------------------------------------------------ +// RCAS is based on the following logic. +// RCAS uses a 5 tap filter in a cross pattern (same as CAS), +// w n +// w 1 w for taps w m e +// w s +// Where 'w' is the negative lobe weight. +// output = (w*(n+e+w+s)+m)/(4*w+1) +// RCAS solves for 'w' by seeing where the signal might clip out of the {0 to 1} input range, +// 0 == (w*(n+e+w+s)+m)/(4*w+1) -> w = -m/(n+e+w+s) +// 1 == (w*(n+e+w+s)+m)/(4*w+1) -> w = (1-m)/(n+e+w+s-4*1) +// Then chooses the 'w' which results in no clipping, limits 'w', and multiplies by the 'sharp' amount. +// This solution above has issues with MSAA input as the steps along the gradient cause edge detection issues. +// So RCAS uses 4x the maximum and 4x the minimum (depending on equation)in place of the individual taps. +// As well as switching from 'm' to either the minimum or maximum (depending on side), to help in energy conservation. +// This stabilizes RCAS. +// RCAS does a simple highpass which is normalized against the local contrast then shaped, +// 0.25 +// 0.25 -1 0.25 +// 0.25 +// This is used as a noise detection filter, to reduce the effect of RCAS on grain, and focus on real edges. +// +// GLSL example for the required callbacks : +// +// FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p){return FfxFloat16x4(imageLoad(imgSrc,FfxInt32x2(p)));} +// void FsrRcasInputH(inout FfxFloat16 r,inout FfxFloat16 g,inout FfxFloat16 b) +// { +// //do any simple input color conversions here or leave empty if none needed +// } +// +// FsrRcasCon need to be called from the CPU or GPU to set up constants. +// Including a GPU example here, the 'con' value would be stored out to a constant buffer. +// +// FfxUInt32x4 con; +// FsrRcasCon(con, +// 0.0); // The scale is {0.0 := maximum sharpness, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. +// --------------- +// RCAS sharpening supports a CAS-like pass-through alpha via, +// #define FSR_RCAS_PASSTHROUGH_ALPHA 1 +// RCAS also supports a define to enable a more expensive path to avoid some sharpening of noise. +// Would suggest it is better to apply film grain after RCAS sharpening (and after scaling) instead of using this define, +// #define FSR_RCAS_DENOISE 1 +//============================================================================================================================== +// This is set at the limit of providing unnatural results for sharpening. +#define FSR_RCAS_LIMIT (0.25-(1.0/16.0)) +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// CONSTANT SETUP +//============================================================================================================================== +// Call to setup required constant values (works on CPU or GPU). + FFX_STATIC void FsrRcasCon(FfxUInt32x4 con, + // The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. + FfxFloat32 sharpness) + { + // Transform from stops to linear value. + sharpness = exp2(-sharpness); + FfxFloat32x2 hSharp = {sharpness, sharpness}; + con[0] = ffxAsUInt32(sharpness); + con[1] = ffxPackHalf2x16(hSharp); + con[2] = 0; + con[3] = 0; + } + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// NON-PACKED 32-BIT VERSION +//============================================================================================================================== +#if defined(FFX_GPU)&&defined(FSR_RCAS_F) + // Input callback prototypes that need to be implemented by calling shader + FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p); + void FsrRcasInputF(inout FfxFloat32 r,inout FfxFloat32 g,inout FfxFloat32 b); +//------------------------------------------------------------------------------------------------------------------------------ + void FsrRcasF(out FfxFloat32 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy. + out FfxFloat32 pixG, + out FfxFloat32 pixB, +#ifdef FSR_RCAS_PASSTHROUGH_ALPHA + out FfxFloat32 pixA, +#endif + FfxUInt32x2 ip, // Integer pixel position in output. + FfxUInt32x4 con) + { // Constant generated by RcasSetup(). + // Algorithm uses minimal 3x3 pixel neighborhood. + // b + // d e f + // h + FfxInt32x2 sp = FfxInt32x2(ip); + FfxFloat32x3 b = FsrRcasLoadF(sp + FfxInt32x2(0, -1)).rgb; + FfxFloat32x3 d = FsrRcasLoadF(sp + FfxInt32x2(-1, 0)).rgb; +#ifdef FSR_RCAS_PASSTHROUGH_ALPHA + FfxFloat32x4 ee = FsrRcasLoadF(sp); + FfxFloat32x3 e = ee.rgb; + pixA = ee.a; +#else + FfxFloat32x3 e = FsrRcasLoadF(sp).rgb; +#endif + FfxFloat32x3 f = FsrRcasLoadF(sp + FfxInt32x2(1, 0)).rgb; + FfxFloat32x3 h = FsrRcasLoadF(sp + FfxInt32x2(0, 1)).rgb; + // Rename (32-bit) or regroup (16-bit). + FfxFloat32 bR = b.r; + FfxFloat32 bG = b.g; + FfxFloat32 bB = b.b; + FfxFloat32 dR = d.r; + FfxFloat32 dG = d.g; + FfxFloat32 dB = d.b; + FfxFloat32 eR = e.r; + FfxFloat32 eG = e.g; + FfxFloat32 eB = e.b; + FfxFloat32 fR = f.r; + FfxFloat32 fG = f.g; + FfxFloat32 fB = f.b; + FfxFloat32 hR = h.r; + FfxFloat32 hG = h.g; + FfxFloat32 hB = h.b; + // Run optional input transform. + FsrRcasInputF(bR, bG, bB); + FsrRcasInputF(dR, dG, dB); + FsrRcasInputF(eR, eG, eB); + FsrRcasInputF(fR, fG, fB); + FsrRcasInputF(hR, hG, hB); + // Luma times 2. + FfxFloat32 bL = bB * FfxFloat32(0.5) + (bR * FfxFloat32(0.5) + bG); + FfxFloat32 dL = dB * FfxFloat32(0.5) + (dR * FfxFloat32(0.5) + dG); + FfxFloat32 eL = eB * FfxFloat32(0.5) + (eR * FfxFloat32(0.5) + eG); + FfxFloat32 fL = fB * FfxFloat32(0.5) + (fR * FfxFloat32(0.5) + fG); + FfxFloat32 hL = hB * FfxFloat32(0.5) + (hR * FfxFloat32(0.5) + hG); + // Noise detection. + FfxFloat32 nz = FfxFloat32(0.25) * bL + FfxFloat32(0.25) * dL + FfxFloat32(0.25) * fL + FfxFloat32(0.25) * hL - eL; + nz = ffxSaturate(abs(nz) * ffxApproximateReciprocalMedium(ffxMax3(ffxMax3(bL, dL, eL), fL, hL) - ffxMin3(ffxMin3(bL, dL, eL), fL, hL))); + nz = FfxFloat32(-0.5) * nz + FfxFloat32(1.0); + // Min and max of ring. + FfxFloat32 mn4R = ffxMin(ffxMin3(bR, dR, fR), hR); + FfxFloat32 mn4G = ffxMin(ffxMin3(bG, dG, fG), hG); + FfxFloat32 mn4B = ffxMin(ffxMin3(bB, dB, fB), hB); + FfxFloat32 mx4R = max(ffxMax3(bR, dR, fR), hR); + FfxFloat32 mx4G = max(ffxMax3(bG, dG, fG), hG); + FfxFloat32 mx4B = max(ffxMax3(bB, dB, fB), hB); + // Immediate constants for peak range. + FfxFloat32x2 peakC = FfxFloat32x2(1.0, -1.0 * 4.0); + // Limiters, these need to be high precision RCPs. + FfxFloat32 hitMinR = mn4R * ffxReciprocal(FfxFloat32(4.0) * mx4R); + FfxFloat32 hitMinG = mn4G * ffxReciprocal(FfxFloat32(4.0) * mx4G); + FfxFloat32 hitMinB = mn4B * ffxReciprocal(FfxFloat32(4.0) * mx4B); + FfxFloat32 hitMaxR = (peakC.x - mx4R) * ffxReciprocal(FfxFloat32(4.0) * mn4R + peakC.y); + FfxFloat32 hitMaxG = (peakC.x - mx4G) * ffxReciprocal(FfxFloat32(4.0) * mn4G + peakC.y); + FfxFloat32 hitMaxB = (peakC.x - mx4B) * ffxReciprocal(FfxFloat32(4.0) * mn4B + peakC.y); + FfxFloat32 lobeR = max(-hitMinR, hitMaxR); + FfxFloat32 lobeG = max(-hitMinG, hitMaxG); + FfxFloat32 lobeB = max(-hitMinB, hitMaxB); + FfxFloat32 lobe = max(FfxFloat32(-FSR_RCAS_LIMIT), ffxMin(ffxMax3(lobeR, lobeG, lobeB), FfxFloat32(0.0))) * ffxAsFloat + (con.x); + // Apply noise removal. +#ifdef FSR_RCAS_DENOISE + lobe *= nz; +#endif + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + FfxFloat32 rcpL = ffxApproximateReciprocalMedium(FfxFloat32(4.0) * lobe + FfxFloat32(1.0)); + pixR = (lobe * bR + lobe * dR + lobe * hR + lobe * fR + eR) * rcpL; + pixG = (lobe * bG + lobe * dG + lobe * hG + lobe * fG + eG) * rcpL; + pixB = (lobe * bB + lobe * dB + lobe * hB + lobe * fB + eB) * rcpL; + return; + } +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// NON-PACKED 16-BIT VERSION +//============================================================================================================================== +#if defined(FFX_GPU) && FFX_HALF == 1 && defined(FSR_RCAS_H) + // Input callback prototypes that need to be implemented by calling shader + FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p); + void FsrRcasInputH(inout FfxFloat16 r,inout FfxFloat16 g,inout FfxFloat16 b); +//------------------------------------------------------------------------------------------------------------------------------ + void FsrRcasH( + out FfxFloat16 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy. + out FfxFloat16 pixG, + out FfxFloat16 pixB, + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + out FfxFloat16 pixA, + #endif + FfxUInt32x2 ip, // Integer pixel position in output. + FfxUInt32x4 con){ // Constant generated by RcasSetup(). + // Sharpening algorithm uses minimal 3x3 pixel neighborhood. + // b + // d e f + // h + FfxInt16x2 sp=FfxInt16x2(ip); + FfxFloat16x3 b=FsrRcasLoadH(sp+FfxInt16x2( 0,-1)).rgb; + FfxFloat16x3 d=FsrRcasLoadH(sp+FfxInt16x2(-1, 0)).rgb; + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + FfxFloat16x4 ee=FsrRcasLoadH(sp); + FfxFloat16x3 e=ee.rgb;pixA=ee.a; + #else + FfxFloat16x3 e=FsrRcasLoadH(sp).rgb; + #endif + FfxFloat16x3 f=FsrRcasLoadH(sp+FfxInt16x2( 1, 0)).rgb; + FfxFloat16x3 h=FsrRcasLoadH(sp+FfxInt16x2( 0, 1)).rgb; + // Rename (32-bit) or regroup (16-bit). + FfxFloat16 bR=b.r; + FfxFloat16 bG=b.g; + FfxFloat16 bB=b.b; + FfxFloat16 dR=d.r; + FfxFloat16 dG=d.g; + FfxFloat16 dB=d.b; + FfxFloat16 eR=e.r; + FfxFloat16 eG=e.g; + FfxFloat16 eB=e.b; + FfxFloat16 fR=f.r; + FfxFloat16 fG=f.g; + FfxFloat16 fB=f.b; + FfxFloat16 hR=h.r; + FfxFloat16 hG=h.g; + FfxFloat16 hB=h.b; + // Run optional input transform. + FsrRcasInputH(bR,bG,bB); + FsrRcasInputH(dR,dG,dB); + FsrRcasInputH(eR,eG,eB); + FsrRcasInputH(fR,fG,fB); + FsrRcasInputH(hR,hG,hB); + // Luma times 2. + FfxFloat16 bL=bB*FFX_BROADCAST_FLOAT16(0.5)+(bR*FFX_BROADCAST_FLOAT16(0.5)+bG); + FfxFloat16 dL=dB*FFX_BROADCAST_FLOAT16(0.5)+(dR*FFX_BROADCAST_FLOAT16(0.5)+dG); + FfxFloat16 eL=eB*FFX_BROADCAST_FLOAT16(0.5)+(eR*FFX_BROADCAST_FLOAT16(0.5)+eG); + FfxFloat16 fL=fB*FFX_BROADCAST_FLOAT16(0.5)+(fR*FFX_BROADCAST_FLOAT16(0.5)+fG); + FfxFloat16 hL=hB*FFX_BROADCAST_FLOAT16(0.5)+(hR*FFX_BROADCAST_FLOAT16(0.5)+hG); + // Noise detection. + FfxFloat16 nz=FFX_BROADCAST_FLOAT16(0.25)*bL+FFX_BROADCAST_FLOAT16(0.25)*dL+FFX_BROADCAST_FLOAT16(0.25)*fL+FFX_BROADCAST_FLOAT16(0.25)*hL-eL; + nz=ffxSaturate(abs(nz)*ffxApproximateReciprocalMediumHalf(ffxMax3Half(ffxMax3Half(bL,dL,eL),fL,hL)-ffxMin3Half(ffxMin3Half(bL,dL,eL),fL,hL))); + nz=FFX_BROADCAST_FLOAT16(-0.5)*nz+FFX_BROADCAST_FLOAT16(1.0); + // Min and max of ring. + FfxFloat16 mn4R=min(ffxMin3Half(bR,dR,fR),hR); + FfxFloat16 mn4G=min(ffxMin3Half(bG,dG,fG),hG); + FfxFloat16 mn4B=min(ffxMin3Half(bB,dB,fB),hB); + FfxFloat16 mx4R=max(ffxMax3Half(bR,dR,fR),hR); + FfxFloat16 mx4G=max(ffxMax3Half(bG,dG,fG),hG); + FfxFloat16 mx4B=max(ffxMax3Half(bB,dB,fB),hB); + // Immediate constants for peak range. + FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0); + // Limiters, these need to be high precision RCPs. + FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4R); + FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4G); + FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4B); + FfxFloat16 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4R+peakC.y); + FfxFloat16 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4G+peakC.y); + FfxFloat16 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4B+peakC.y); + FfxFloat16 lobeR=max(-hitMinR,hitMaxR); + FfxFloat16 lobeG=max(-hitMinG,hitMaxG); + FfxFloat16 lobeB=max(-hitMinB,hitMaxB); + FfxFloat16 lobe=max(FFX_BROADCAST_FLOAT16(-FSR_RCAS_LIMIT),min(ffxMax3Half(lobeR,lobeG,lobeB),FFX_BROADCAST_FLOAT16(0.0)))*FFX_UINT32_TO_FLOAT16X2(con.y).x; + // Apply noise removal. + #ifdef FSR_RCAS_DENOISE + lobe*=nz; + #endif + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + FfxFloat16 rcpL=ffxApproximateReciprocalMediumHalf(FFX_BROADCAST_FLOAT16(4.0)*lobe+FFX_BROADCAST_FLOAT16(1.0)); + pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; + pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; + pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL; +} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// PACKED 16-BIT VERSION +//============================================================================================================================== +#if defined(FFX_GPU)&& FFX_HALF == 1 && defined(FSR_RCAS_HX2) + // Input callback prototypes that need to be implemented by the calling shader + FfxFloat16x4 FsrRcasLoadHx2(FfxInt16x2 p); + void FsrRcasInputHx2(inout FfxFloat16x2 r,inout FfxFloat16x2 g,inout FfxFloat16x2 b); +//------------------------------------------------------------------------------------------------------------------------------ + // Can be used to convert from packed Structures of Arrays to Arrays of Structures for store. + void FsrRcasDepackHx2(out FfxFloat16x4 pix0,out FfxFloat16x4 pix1,FfxFloat16x2 pixR,FfxFloat16x2 pixG,FfxFloat16x2 pixB){ + #ifdef FFX_HLSL + // Invoke a slower path for DX only, since it won't allow uninitialized values. + pix0.a=pix1.a=0.0; + #endif + pix0.rgb=FfxFloat16x3(pixR.x,pixG.x,pixB.x); + pix1.rgb=FfxFloat16x3(pixR.y,pixG.y,pixB.y);} +//------------------------------------------------------------------------------------------------------------------------------ + void FsrRcasHx2( + // Output values are for 2 8x8 tiles in a 16x8 region. + // pix.x = left 8x8 tile + // pix.y = right 8x8 tile + // This enables later processing to easily be packed as well. + out FfxFloat16x2 pixR, + out FfxFloat16x2 pixG, + out FfxFloat16x2 pixB, + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + out FfxFloat16x2 pixA, + #endif + FfxUInt32x2 ip, // Integer pixel position in output. + FfxUInt32x4 con){ // Constant generated by RcasSetup(). + // No scaling algorithm uses minimal 3x3 pixel neighborhood. + FfxInt16x2 sp0=FfxInt16x2(ip); + FfxFloat16x3 b0=FsrRcasLoadHx2(sp0+FfxInt16x2( 0,-1)).rgb; + FfxFloat16x3 d0=FsrRcasLoadHx2(sp0+FfxInt16x2(-1, 0)).rgb; + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + FfxFloat16x4 ee0=FsrRcasLoadHx2(sp0); + FfxFloat16x3 e0=ee0.rgb;pixA.r=ee0.a; + #else + FfxFloat16x3 e0=FsrRcasLoadHx2(sp0).rgb; + #endif + FfxFloat16x3 f0=FsrRcasLoadHx2(sp0+FfxInt16x2( 1, 0)).rgb; + FfxFloat16x3 h0=FsrRcasLoadHx2(sp0+FfxInt16x2( 0, 1)).rgb; + FfxInt16x2 sp1=sp0+FfxInt16x2(8,0); + FfxFloat16x3 b1=FsrRcasLoadHx2(sp1+FfxInt16x2( 0,-1)).rgb; + FfxFloat16x3 d1=FsrRcasLoadHx2(sp1+FfxInt16x2(-1, 0)).rgb; + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + FfxFloat16x4 ee1=FsrRcasLoadHx2(sp1); + FfxFloat16x3 e1=ee1.rgb;pixA.g=ee1.a; + #else + FfxFloat16x3 e1=FsrRcasLoadHx2(sp1).rgb; + #endif + FfxFloat16x3 f1=FsrRcasLoadHx2(sp1+FfxInt16x2( 1, 0)).rgb; + FfxFloat16x3 h1=FsrRcasLoadHx2(sp1+FfxInt16x2( 0, 1)).rgb; + // Arrays of Structures to Structures of Arrays conversion. + FfxFloat16x2 bR=FfxFloat16x2(b0.r,b1.r); + FfxFloat16x2 bG=FfxFloat16x2(b0.g,b1.g); + FfxFloat16x2 bB=FfxFloat16x2(b0.b,b1.b); + FfxFloat16x2 dR=FfxFloat16x2(d0.r,d1.r); + FfxFloat16x2 dG=FfxFloat16x2(d0.g,d1.g); + FfxFloat16x2 dB=FfxFloat16x2(d0.b,d1.b); + FfxFloat16x2 eR=FfxFloat16x2(e0.r,e1.r); + FfxFloat16x2 eG=FfxFloat16x2(e0.g,e1.g); + FfxFloat16x2 eB=FfxFloat16x2(e0.b,e1.b); + FfxFloat16x2 fR=FfxFloat16x2(f0.r,f1.r); + FfxFloat16x2 fG=FfxFloat16x2(f0.g,f1.g); + FfxFloat16x2 fB=FfxFloat16x2(f0.b,f1.b); + FfxFloat16x2 hR=FfxFloat16x2(h0.r,h1.r); + FfxFloat16x2 hG=FfxFloat16x2(h0.g,h1.g); + FfxFloat16x2 hB=FfxFloat16x2(h0.b,h1.b); + // Run optional input transform. + FsrRcasInputHx2(bR,bG,bB); + FsrRcasInputHx2(dR,dG,dB); + FsrRcasInputHx2(eR,eG,eB); + FsrRcasInputHx2(fR,fG,fB); + FsrRcasInputHx2(hR,hG,hB); + // Luma times 2. + FfxFloat16x2 bL=bB*FFX_BROADCAST_FLOAT16X2(0.5)+(bR*FFX_BROADCAST_FLOAT16X2(0.5)+bG); + FfxFloat16x2 dL=dB*FFX_BROADCAST_FLOAT16X2(0.5)+(dR*FFX_BROADCAST_FLOAT16X2(0.5)+dG); + FfxFloat16x2 eL=eB*FFX_BROADCAST_FLOAT16X2(0.5)+(eR*FFX_BROADCAST_FLOAT16X2(0.5)+eG); + FfxFloat16x2 fL=fB*FFX_BROADCAST_FLOAT16X2(0.5)+(fR*FFX_BROADCAST_FLOAT16X2(0.5)+fG); + FfxFloat16x2 hL=hB*FFX_BROADCAST_FLOAT16X2(0.5)+(hR*FFX_BROADCAST_FLOAT16X2(0.5)+hG); + // Noise detection. + FfxFloat16x2 nz=FFX_BROADCAST_FLOAT16X2(0.25)*bL+FFX_BROADCAST_FLOAT16X2(0.25)*dL+FFX_BROADCAST_FLOAT16X2(0.25)*fL+FFX_BROADCAST_FLOAT16X2(0.25)*hL-eL; + nz=ffxSaturate(abs(nz)*ffxApproximateReciprocalMediumHalf(ffxMax3Half(ffxMax3Half(bL,dL,eL),fL,hL)-ffxMin3Half(ffxMin3Half(bL,dL,eL),fL,hL))); + nz=FFX_BROADCAST_FLOAT16X2(-0.5)*nz+FFX_BROADCAST_FLOAT16X2(1.0); + // Min and max of ring. + FfxFloat16x2 mn4R=min(ffxMin3Half(bR,dR,fR),hR); + FfxFloat16x2 mn4G=min(ffxMin3Half(bG,dG,fG),hG); + FfxFloat16x2 mn4B=min(ffxMin3Half(bB,dB,fB),hB); + FfxFloat16x2 mx4R=max(ffxMax3Half(bR,dR,fR),hR); + FfxFloat16x2 mx4G=max(ffxMax3Half(bG,dG,fG),hG); + FfxFloat16x2 mx4B=max(ffxMax3Half(bB,dB,fB),hB); + // Immediate constants for peak range. + FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0); + // Limiters, these need to be high precision RCPs. + FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4R); + FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4G); + FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4B); + FfxFloat16x2 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4R+peakC.y); + FfxFloat16x2 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4G+peakC.y); + FfxFloat16x2 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4B+peakC.y); + FfxFloat16x2 lobeR=max(-hitMinR,hitMaxR); + FfxFloat16x2 lobeG=max(-hitMinG,hitMaxG); + FfxFloat16x2 lobeB=max(-hitMinB,hitMaxB); + FfxFloat16x2 lobe=max(FFX_BROADCAST_FLOAT16X2(-FSR_RCAS_LIMIT),min(ffxMax3Half(lobeR,lobeG,lobeB),FFX_BROADCAST_FLOAT16X2(0.0)))*FFX_BROADCAST_FLOAT16X2(FFX_UINT32_TO_FLOAT16X2(con.y).x); + // Apply noise removal. + #ifdef FSR_RCAS_DENOISE + lobe*=nz; + #endif + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + FfxFloat16x2 rcpL=ffxApproximateReciprocalMediumHalf(FFX_BROADCAST_FLOAT16X2(4.0)*lobe+FFX_BROADCAST_FLOAT16X2(1.0)); + pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; + pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; + pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [LFGA] LINEAR FILM GRAIN APPLICATOR +// +//------------------------------------------------------------------------------------------------------------------------------ +// Adding output-resolution film grain after scaling is a good way to mask both rendering and scaling artifacts. +// Suggest using tiled blue noise as film grain input, with peak noise frequency set for a specific look and feel. +// The 'Lfga*()' functions provide a convenient way to introduce grain. +// These functions limit grain based on distance to signal limits. +// This is done so that the grain is temporally energy preserving, and thus won't modify image tonality. +// Grain application should be done in a linear colorspace. +// The grain should be temporally changing, but have a temporal sum per pixel that adds to zero (non-biased). +//------------------------------------------------------------------------------------------------------------------------------ +// Usage, +// FsrLfga*( +// color, // In/out linear colorspace color {0 to 1} ranged. +// grain, // Per pixel grain texture value {-0.5 to 0.5} ranged, input is 3-channel to support colored grain. +// amount); // Amount of grain (0 to 1} ranged. +//------------------------------------------------------------------------------------------------------------------------------ +// Example if grain texture is monochrome: 'FsrLfgaF(color,ffxBroadcast3(grain),amount)' +//============================================================================================================================== +#if defined(FFX_GPU) + // Maximum grain is the minimum distance to the signal limit. + void FsrLfgaF(inout FfxFloat32x3 c, FfxFloat32x3 t, FfxFloat32 a) + { + c += (t * ffxBroadcast3(a)) * ffxMin(ffxBroadcast3(1.0) - c, c); + } +#endif +//============================================================================================================================== +#if defined(FFX_GPU)&& FFX_HALF == 1 + // Half precision version (slower). + void FsrLfgaH(inout FfxFloat16x3 c, FfxFloat16x3 t, FfxFloat16 a) + { + c += (t * FFX_BROADCAST_FLOAT16X3(a)) * min(FFX_BROADCAST_FLOAT16X3(1.0) - c, c); + } + //------------------------------------------------------------------------------------------------------------------------------ + // Packed half precision version (faster). + void FsrLfgaHx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB,FfxFloat16x2 tR,FfxFloat16x2 tG,FfxFloat16x2 tB,FfxFloat16 a){ + cR+=(tR*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cR,cR);cG+=(tG*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cG,cG);cB+=(tB*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cB,cB);} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [SRTM] SIMPLE REVERSIBLE TONE-MAPPER +// +//------------------------------------------------------------------------------------------------------------------------------ +// This provides a way to take linear HDR color {0 to FP16_MAX} and convert it into a temporary {0 to 1} ranged post-tonemapped linear. +// The tonemapper preserves RGB ratio, which helps maintain HDR color bleed during filtering. +//------------------------------------------------------------------------------------------------------------------------------ +// Reversible tonemapper usage, +// FsrSrtm*(color); // {0 to FP16_MAX} converted to {0 to 1}. +// FsrSrtmInv*(color); // {0 to 1} converted into {0 to 32768, output peak safe for FP16}. +//============================================================================================================================== +#if defined(FFX_GPU) + void FsrSrtmF(inout FfxFloat32x3 c) + { + c *= ffxBroadcast3(ffxReciprocal(ffxMax3(c.r, c.g, c.b) + FfxFloat32(1.0))); + } + // The extra max solves the c=1.0 case (which is a /0). + void FsrSrtmInvF(inout FfxFloat32x3 c){c*=ffxBroadcast3(ffxReciprocal(max(FfxFloat32(1.0/32768.0),FfxFloat32(1.0)-ffxMax3(c.r,c.g,c.b))));} +#endif +//============================================================================================================================== +#if defined(FFX_GPU )&& FFX_HALF == 1 + void FsrSrtmH(inout FfxFloat16x3 c) + { + c *= FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(ffxMax3Half(c.r, c.g, c.b) + FFX_BROADCAST_FLOAT16(1.0))); + } + void FsrSrtmInvH(inout FfxFloat16x3 c) + { + c *= FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(max(FFX_BROADCAST_FLOAT16(1.0 / 32768.0), FFX_BROADCAST_FLOAT16(1.0) - ffxMax3Half(c.r, c.g, c.b)))); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrSrtmHx2(inout FfxFloat16x2 cR, inout FfxFloat16x2 cG, inout FfxFloat16x2 cB) + { + FfxFloat16x2 rcp = ffxReciprocalHalf(ffxMax3Half(cR, cG, cB) + FFX_BROADCAST_FLOAT16X2(1.0)); + cR *= rcp; + cG *= rcp; + cB *= rcp; + } + void FsrSrtmInvHx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB) + { + FfxFloat16x2 rcp=ffxReciprocalHalf(max(FFX_BROADCAST_FLOAT16X2(1.0/32768.0),FFX_BROADCAST_FLOAT16X2(1.0)-ffxMax3Half(cR,cG,cB))); + cR*=rcp; + cG*=rcp; + cB*=rcp; + } +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [TEPD] TEMPORAL ENERGY PRESERVING DITHER +// +//------------------------------------------------------------------------------------------------------------------------------ +// Temporally energy preserving dithered {0 to 1} linear to gamma 2.0 conversion. +// Gamma 2.0 is used so that the conversion back to linear is just to square the color. +// The conversion comes in 8-bit and 10-bit modes, designed for output to 8-bit UNORM or 10:10:10:2 respectively. +// Given good non-biased temporal blue noise as dither input, +// the output dither will temporally conserve energy. +// This is done by choosing the linear nearest step point instead of perceptual nearest. +// See code below for details. +//------------------------------------------------------------------------------------------------------------------------------ +// DX SPEC RULES FOR FLOAT->UNORM 8-BIT CONVERSION +// =============================================== +// - Output is 'FfxUInt32(floor(saturate(n)*255.0+0.5))'. +// - Thus rounding is to nearest. +// - NaN gets converted to zero. +// - INF is clamped to {0.0 to 1.0}. +//============================================================================================================================== +#if defined(FFX_GPU) + // Hand tuned integer position to dither value, with more values than simple checkerboard. + // Only 32-bit has enough precision for this compddation. + // Output is {0 to <1}. + FfxFloat32 FsrTepdDitF(FfxUInt32x2 p, FfxUInt32 f) + { + FfxFloat32 x = FfxFloat32(p.x + f); + FfxFloat32 y = FfxFloat32(p.y); + // The 1.61803 golden ratio. + FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0); + // Number designed to provide a good visual pattern. + FfxFloat32 b = FfxFloat32(1.0 / 3.69); + x = x * a + (y * b); + return ffxFract(x); + } + //------------------------------------------------------------------------------------------------------------------------------ + // This version is 8-bit gamma 2.0. + // The 'c' input is {0 to 1}. + // Output is {0 to 1} ready for image store. + void FsrTepdC8F(inout FfxFloat32x3 c, FfxFloat32 dit) + { + FfxFloat32x3 n = ffxSqrt(c); + n = floor(n * ffxBroadcast3(255.0)) * ffxBroadcast3(1.0 / 255.0); + FfxFloat32x3 a = n * n; + FfxFloat32x3 b = n + ffxBroadcast3(1.0 / 255.0); + b = b * b; + // Ratio of 'a' to 'b' required to produce 'c'. + // ffxApproximateReciprocal() won't work here (at least for very high dynamic ranges). + // ffxApproximateReciprocalMedium() is an IADD,FMA,MUL. + FfxFloat32x3 r = (c - b) * ffxApproximateReciprocalMedium(a - b); + // Use the ratio as a cutoff to choose 'a' or 'b'. + // ffxIsGreaterThanZero() is a MUL. + c = ffxSaturate(n + ffxIsGreaterThanZero(ffxBroadcast3(dit) - r) * ffxBroadcast3(1.0 / 255.0)); + } + //------------------------------------------------------------------------------------------------------------------------------ + // This version is 10-bit gamma 2.0. + // The 'c' input is {0 to 1}. + // Output is {0 to 1} ready for image store. + void FsrTepdC10F(inout FfxFloat32x3 c, FfxFloat32 dit) + { + FfxFloat32x3 n = ffxSqrt(c); + n = floor(n * ffxBroadcast3(1023.0)) * ffxBroadcast3(1.0 / 1023.0); + FfxFloat32x3 a = n * n; + FfxFloat32x3 b = n + ffxBroadcast3(1.0 / 1023.0); + b = b * b; + FfxFloat32x3 r = (c - b) * ffxApproximateReciprocalMedium(a - b); + c = ffxSaturate(n + ffxIsGreaterThanZero(ffxBroadcast3(dit) - r) * ffxBroadcast3(1.0 / 1023.0)); + } +#endif +//============================================================================================================================== +#if defined(FFX_GPU)&& FFX_HALF == 1 + FfxFloat16 FsrTepdDitH(FfxUInt32x2 p, FfxUInt32 f) + { + FfxFloat32 x = FfxFloat32(p.x + f); + FfxFloat32 y = FfxFloat32(p.y); + FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0); + FfxFloat32 b = FfxFloat32(1.0 / 3.69); + x = x * a + (y * b); + return FfxFloat16(ffxFract(x)); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC8H(inout FfxFloat16x3 c, FfxFloat16 dit) + { + FfxFloat16x3 n = sqrt(c); + n = floor(n * FFX_BROADCAST_FLOAT16X3(255.0)) * FFX_BROADCAST_FLOAT16X3(1.0 / 255.0); + FfxFloat16x3 a = n * n; + FfxFloat16x3 b = n + FFX_BROADCAST_FLOAT16X3(1.0 / 255.0); + b = b * b; + FfxFloat16x3 r = (c - b) * ffxApproximateReciprocalMediumHalf(a - b); + c = ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFX_BROADCAST_FLOAT16X3(dit) - r) * FFX_BROADCAST_FLOAT16X3(1.0 / 255.0)); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC10H(inout FfxFloat16x3 c, FfxFloat16 dit) + { + FfxFloat16x3 n = sqrt(c); + n = floor(n * FFX_BROADCAST_FLOAT16X3(1023.0)) * FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0); + FfxFloat16x3 a = n * n; + FfxFloat16x3 b = n + FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0); + b = b * b; + FfxFloat16x3 r = (c - b) * ffxApproximateReciprocalMediumHalf(a - b); + c = ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFX_BROADCAST_FLOAT16X3(dit) - r) * FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0)); + } + //============================================================================================================================== + // This computes dither for positions 'p' and 'p+{8,0}'. + FfxFloat16x2 FsrTepdDitHx2(FfxUInt32x2 p, FfxUInt32 f) + { + FfxFloat32x2 x; + x.x = FfxFloat32(p.x + f); + x.y = x.x + FfxFloat32(8.0); + FfxFloat32 y = FfxFloat32(p.y); + FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0); + FfxFloat32 b = FfxFloat32(1.0 / 3.69); + x = x * ffxBroadcast2(a) + ffxBroadcast2(y * b); + return FfxFloat16x2(ffxFract(x)); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC8Hx2(inout FfxFloat16x2 cR, inout FfxFloat16x2 cG, inout FfxFloat16x2 cB, FfxFloat16x2 dit) + { + FfxFloat16x2 nR = sqrt(cR); + FfxFloat16x2 nG = sqrt(cG); + FfxFloat16x2 nB = sqrt(cB); + nR = floor(nR * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + nG = floor(nG * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + nB = floor(nB * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + FfxFloat16x2 aR = nR * nR; + FfxFloat16x2 aG = nG * nG; + FfxFloat16x2 aB = nB * nB; + FfxFloat16x2 bR = nR + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + bR = bR * bR; + FfxFloat16x2 bG = nG + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + bG = bG * bG; + FfxFloat16x2 bB = nB + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + bB = bB * bB; + FfxFloat16x2 rR = (cR - bR) * ffxApproximateReciprocalMediumHalf(aR - bR); + FfxFloat16x2 rG = (cG - bG) * ffxApproximateReciprocalMediumHalf(aG - bG); + FfxFloat16x2 rB = (cB - bB) * ffxApproximateReciprocalMediumHalf(aB - bB); + cR = ffxSaturate(nR + ffxIsGreaterThanZeroHalf(dit - rR) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0)); + cG = ffxSaturate(nG + ffxIsGreaterThanZeroHalf(dit - rG) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0)); + cB = ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0)); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC10Hx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB,FfxFloat16x2 dit){ + FfxFloat16x2 nR=sqrt(cR); + FfxFloat16x2 nG=sqrt(cG); + FfxFloat16x2 nB=sqrt(cB); + nR=floor(nR*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0); + nG=floor(nG*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0); + nB=floor(nB*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0); + FfxFloat16x2 aR=nR*nR; + FfxFloat16x2 aG=nG*nG; + FfxFloat16x2 aB=nB*nB; + FfxFloat16x2 bR=nR+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bR=bR*bR; + FfxFloat16x2 bG=nG+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bG=bG*bG; + FfxFloat16x2 bB=nB+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bB=bB*bB; + FfxFloat16x2 rR=(cR-bR)*ffxApproximateReciprocalMediumHalf(aR-bR); + FfxFloat16x2 rG=(cG-bG)*ffxApproximateReciprocalMediumHalf(aG-bG); + FfxFloat16x2 rB=(cB-bB)*ffxApproximateReciprocalMediumHalf(aB-bB); + cR=ffxSaturate(nR+ffxIsGreaterThanZeroHalf(dit-rR)*FFX_BROADCAST_FLOAT16X2(1.0/1023.0)); + cG=ffxSaturate(nG+ffxIsGreaterThanZeroHalf(dit-rG)*FFX_BROADCAST_FLOAT16X2(1.0/1023.0)); + cB = ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFX_BROADCAST_FLOAT16X2(1.0 / 1023.0)); +} +#endif diff --git a/Shaders/shaders/fsr1/ffx_fsr1.h.meta b/Shaders/shaders/fsr1/ffx_fsr1.h.meta new file mode 100644 index 0000000..4a8bdf5 --- /dev/null +++ b/Shaders/shaders/fsr1/ffx_fsr1.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: e73be06d181f19f42a1e8f1cf6ebc92b +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr2.meta b/Shaders/shaders/fsr2.meta new file mode 100644 index 0000000..dcb75fc --- /dev/null +++ b/Shaders/shaders/fsr2.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 3aeed5c87f2e3e345adb5ef1906cd480 +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr2/ffx_fsr2_accumulate.h b/Shaders/shaders/fsr2/ffx_fsr2_accumulate.h new file mode 100644 index 0000000..d87c748 --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_accumulate.h @@ -0,0 +1,295 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR2_ACCUMULATE_H +#define FFX_FSR2_ACCUMULATE_H + +FfxFloat32 GetPxHrVelocity(FfxFloat32x2 fMotionVector) +{ + return length(fMotionVector * DisplaySize()); +} +#if FFX_HALF +FFX_MIN16_F GetPxHrVelocity(FFX_MIN16_F2 fMotionVector) +{ + return length(fMotionVector * FFX_MIN16_F2(DisplaySize())); +} +#endif + +void Accumulate(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor, FfxFloat32x3 fAccumulation, FFX_PARAMETER_IN FfxFloat32x4 fUpsampledColorAndWeight) +{ + // Avoid invalid values when accumulation and upsampled weight is 0 + fAccumulation = ffxMax(FSR2_EPSILON.xxx, fAccumulation + fUpsampledColorAndWeight.www); + +#if FFX_FSR2_OPTION_HDR_COLOR_INPUT + //YCoCg -> RGB -> Tonemap -> YCoCg (Use RGB tonemapper to avoid color desaturation) + fUpsampledColorAndWeight.xyz = RGBToYCoCg(Tonemap(YCoCgToRGB(fUpsampledColorAndWeight.xyz))); + fHistoryColor = RGBToYCoCg(Tonemap(YCoCgToRGB(fHistoryColor))); +#endif + + const FfxFloat32x3 fAlpha = fUpsampledColorAndWeight.www / fAccumulation; + fHistoryColor = ffxLerp(fHistoryColor, fUpsampledColorAndWeight.xyz, fAlpha); + + fHistoryColor = YCoCgToRGB(fHistoryColor); + +#if FFX_FSR2_OPTION_HDR_COLOR_INPUT + fHistoryColor = InverseTonemap(fHistoryColor); +#endif +} + +void RectifyHistory( + const AccumulationPassCommonParams params, + RectificationBox clippingBox, + FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor, + FFX_PARAMETER_INOUT FfxFloat32x3 fAccumulation, + FfxFloat32 fLockContributionThisFrame, + FfxFloat32 fTemporalReactiveFactor, + FfxFloat32 fLumaInstabilityFactor) +{ + FfxFloat32 fScaleFactorInfluence = ffxMin(20.0f, ffxPow(FfxFloat32(1.0f / length(DownscaleFactor().x * DownscaleFactor().y)), 3.0f)); + + const FfxFloat32 fVecolityFactor = ffxSaturate(params.fHrVelocity / 20.0f); + const FfxFloat32 fBoxScaleT = ffxMax(params.fDepthClipFactor, ffxMax(params.fAccumulationMask, fVecolityFactor)); + FfxFloat32 fBoxScale = ffxLerp(fScaleFactorInfluence, 1.0f, fBoxScaleT); + + FfxFloat32x3 fScaledBoxVec = clippingBox.boxVec * fBoxScale; + FfxFloat32x3 boxMin = clippingBox.boxCenter - fScaledBoxVec; + FfxFloat32x3 boxMax = clippingBox.boxCenter + fScaledBoxVec; + FfxFloat32x3 boxCenter = clippingBox.boxCenter; + FfxFloat32 boxVecSize = length(clippingBox.boxVec); + + boxMin = ffxMax(clippingBox.aabbMin, boxMin); + boxMax = ffxMin(clippingBox.aabbMax, boxMax); + + if (any(FFX_GREATER_THAN(boxMin, fHistoryColor)) || any(FFX_GREATER_THAN(fHistoryColor, boxMax))) { + + const FfxFloat32x3 fClampedHistoryColor = clamp(fHistoryColor, boxMin, boxMax); + + FfxFloat32x3 fHistoryContribution = ffxMax(fLumaInstabilityFactor, fLockContributionThisFrame).xxx; + + const FfxFloat32 fReactiveFactor = params.fDilatedReactiveFactor; + const FfxFloat32 fReactiveContribution = 1.0f - ffxPow(fReactiveFactor, 1.0f / 2.0f); + fHistoryContribution *= fReactiveContribution; + + // Scale history color using rectification info, also using accumulation mask to avoid potential invalid color protection + fHistoryColor = ffxLerp(fClampedHistoryColor, fHistoryColor, ffxSaturate(fHistoryContribution)); + + // Scale accumulation using rectification info + const FfxFloat32x3 fAccumulationMin = ffxMin(fAccumulation, FFX_BROADCAST_FLOAT32X3(0.1f)); + fAccumulation = ffxLerp(fAccumulationMin, fAccumulation, ffxSaturate(fHistoryContribution)); + } +} + +void WriteUpscaledOutput(FfxInt32x2 iPxHrPos, FfxFloat32x3 fUpscaledColor) +{ + StoreUpscaledOutput(iPxHrPos, fUpscaledColor); +} + +void FinalizeLockStatus(const AccumulationPassCommonParams params, FfxFloat32x2 fLockStatus, FfxFloat32 fUpsampledWeight) +{ + // we expect similar motion for next frame + // kill lock if that location is outside screen, avoid locks to be clamped to screen borders + FfxFloat32x2 fEstimatedUvNextFrame = params.fHrUv - params.fMotionVector; + if (IsUvInside(fEstimatedUvNextFrame) == false) { + KillLock(fLockStatus); + } + else { + // Decrease lock lifetime + const FfxFloat32 fLifetimeDecreaseLanczosMax = FfxFloat32(JitterSequenceLength()) * FfxFloat32(fAverageLanczosWeightPerFrame); + const FfxFloat32 fLifetimeDecrease = FfxFloat32(fUpsampledWeight / fLifetimeDecreaseLanczosMax); + fLockStatus[LOCK_LIFETIME_REMAINING] = ffxMax(FfxFloat32(0), fLockStatus[LOCK_LIFETIME_REMAINING] - fLifetimeDecrease); + } + + StoreLockStatus(params.iPxHrPos, fLockStatus); +} + + +FfxFloat32x3 ComputeBaseAccumulationWeight(const AccumulationPassCommonParams params, FfxFloat32 fThisFrameReactiveFactor, FfxBoolean bInMotionLastFrame, FfxFloat32 fUpsampledWeight, LockState lockState) +{ + // Always assume max accumulation was reached + FfxFloat32 fBaseAccumulation = fMaxAccumulationLanczosWeight * FfxFloat32(params.bIsExistingSample) * (1.0f - fThisFrameReactiveFactor) * (1.0f - params.fDepthClipFactor); + + fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight * 10.0f, ffxMax(FfxFloat32(bInMotionLastFrame), ffxSaturate(params.fHrVelocity * FfxFloat32(10))))); + + fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight, ffxSaturate(params.fHrVelocity / FfxFloat32(20)))); + + return fBaseAccumulation.xxx; +} + +FfxFloat32 ComputeLumaInstabilityFactor(const AccumulationPassCommonParams params, RectificationBox clippingBox, FfxFloat32 fThisFrameReactiveFactor, FfxFloat32 fLuminanceDiff) +{ + const FfxFloat32 fUnormThreshold = 1.0f / 255.0f; + const FfxInt32 N_MINUS_1 = 0; + const FfxInt32 N_MINUS_2 = 1; + const FfxInt32 N_MINUS_3 = 2; + const FfxInt32 N_MINUS_4 = 3; + + FfxFloat32 fCurrentFrameLuma = clippingBox.boxCenter.x; + +#if FFX_FSR2_OPTION_HDR_COLOR_INPUT + fCurrentFrameLuma = fCurrentFrameLuma / (1.0f + ffxMax(0.0f, fCurrentFrameLuma)); +#endif + + fCurrentFrameLuma = round(fCurrentFrameLuma * 255.0f) / 255.0f; + + const FfxBoolean bSampleLumaHistory = (ffxMax(ffxMax(params.fDepthClipFactor, params.fAccumulationMask), fLuminanceDiff) < 0.1f) && (params.bIsNewSample == false); + FfxFloat32x4 fCurrentFrameLumaHistory = bSampleLumaHistory ? SampleLumaHistory(params.fReprojectedHrUv) : FFX_BROADCAST_FLOAT32X4(0.0f); + + FfxFloat32 fLumaInstability = 0.0f; + FfxFloat32 fDiffs0 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[N_MINUS_1]); + + FfxFloat32 fMin = abs(fDiffs0); + + if (fMin >= fUnormThreshold) { + for (int i = N_MINUS_2; i <= N_MINUS_4; i++) { + FfxFloat32 fDiffs1 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[i]); + + if (sign(fDiffs0) == sign(fDiffs1)) { + + // Scale difference to protect historically similar values + const FfxFloat32 fMinBias = 1.0f; + fMin = ffxMin(fMin, abs(fDiffs1) * fMinBias); + } + } + + const FfxFloat32 fBoxSize = clippingBox.boxVec.x; + const FfxFloat32 fBoxSizeFactor = ffxPow(ffxSaturate(fBoxSize / 0.1f), 6.0f); + + fLumaInstability = FfxFloat32(fMin != abs(fDiffs0)) * fBoxSizeFactor; + fLumaInstability = FfxFloat32(fLumaInstability > fUnormThreshold); + + fLumaInstability *= 1.0f - ffxMax(params.fAccumulationMask, ffxPow(fThisFrameReactiveFactor, 1.0f / 6.0f)); + } + + //shift history + fCurrentFrameLumaHistory[N_MINUS_4] = fCurrentFrameLumaHistory[N_MINUS_3]; + fCurrentFrameLumaHistory[N_MINUS_3] = fCurrentFrameLumaHistory[N_MINUS_2]; + fCurrentFrameLumaHistory[N_MINUS_2] = fCurrentFrameLumaHistory[N_MINUS_1]; + fCurrentFrameLumaHistory[N_MINUS_1] = fCurrentFrameLuma; + + StoreLumaHistory(params.iPxHrPos, fCurrentFrameLumaHistory); + + return fLumaInstability * FfxFloat32(fCurrentFrameLumaHistory[N_MINUS_4] != 0); +} + +FfxFloat32 ComputeTemporalReactiveFactor(const AccumulationPassCommonParams params, FfxFloat32 fTemporalReactiveFactor) +{ + FfxFloat32 fNewFactor = ffxMin(0.99f, fTemporalReactiveFactor); + + fNewFactor = ffxMax(fNewFactor, ffxLerp(fNewFactor, 0.4f, ffxSaturate(params.fHrVelocity))); + + fNewFactor = ffxMax(fNewFactor * fNewFactor, ffxMax(params.fDepthClipFactor * 0.1f, params.fDilatedReactiveFactor)); + + // Force reactive factor for new samples + fNewFactor = params.bIsNewSample ? 1.0f : fNewFactor; + + if (ffxSaturate(params.fHrVelocity * 10.0f) >= 1.0f) { + fNewFactor = ffxMax(FSR2_EPSILON, fNewFactor) * -1.0f; + } + + return fNewFactor; +} + +AccumulationPassCommonParams InitParams(FfxInt32x2 iPxHrPos) +{ + AccumulationPassCommonParams params; + + params.iPxHrPos = iPxHrPos; + const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize(); + params.fHrUv = fHrUv; + + const FfxFloat32x2 fLrUvJittered = fHrUv + Jitter() / RenderSize(); + params.fLrUv_HwSampler = ClampUv(fLrUvJittered, RenderSize(), MaxRenderSize()); + + params.fMotionVector = GetMotionVector(iPxHrPos, fHrUv); + params.fHrVelocity = GetPxHrVelocity(params.fMotionVector); + + ComputeReprojectedUVs(params, params.fReprojectedHrUv, params.bIsExistingSample); + + params.fDepthClipFactor = ffxSaturate(SampleDepthClip(params.fLrUv_HwSampler)); + + const FfxFloat32x2 fDilatedReactiveMasks = SampleDilatedReactiveMasks(params.fLrUv_HwSampler); + params.fDilatedReactiveFactor = fDilatedReactiveMasks.x; + params.fAccumulationMask = fDilatedReactiveMasks.y; + params.bIsResetFrame = (0 == FrameIndex()); + + params.bIsNewSample = (params.bIsExistingSample == false || params.bIsResetFrame); + + return params; +} + +void Accumulate(FfxInt32x2 iPxHrPos) +{ + const AccumulationPassCommonParams params = InitParams(iPxHrPos); + + FfxFloat32x3 fHistoryColor = FfxFloat32x3(0, 0, 0); + FfxFloat32x2 fLockStatus; + InitializeNewLockSample(fLockStatus); + + FfxFloat32 fTemporalReactiveFactor = 0.0f; + FfxBoolean bInMotionLastFrame = FFX_FALSE; + LockState lockState = { FFX_FALSE , FFX_FALSE }; + if (params.bIsExistingSample && !params.bIsResetFrame) { + ReprojectHistoryColor(params, fHistoryColor, fTemporalReactiveFactor, bInMotionLastFrame); + lockState = ReprojectHistoryLockStatus(params, fLockStatus); + } + + FfxFloat32 fThisFrameReactiveFactor = ffxMax(params.fDilatedReactiveFactor, fTemporalReactiveFactor); + + FfxFloat32 fLuminanceDiff = 0.0f; + FfxFloat32 fLockContributionThisFrame = 0.0f; + UpdateLockStatus(params, fThisFrameReactiveFactor, lockState, fLockStatus, fLockContributionThisFrame, fLuminanceDiff); + + // Load upsampled input color + RectificationBox clippingBox; + FfxFloat32x4 fUpsampledColorAndWeight = ComputeUpsampledColorAndWeight(params, clippingBox, fThisFrameReactiveFactor); + + const FfxFloat32 fLumaInstabilityFactor = ComputeLumaInstabilityFactor(params, clippingBox, fThisFrameReactiveFactor, fLuminanceDiff); + + + FfxFloat32x3 fAccumulation = ComputeBaseAccumulationWeight(params, fThisFrameReactiveFactor, bInMotionLastFrame, fUpsampledColorAndWeight.w, lockState); + + if (params.bIsNewSample) { + fHistoryColor = YCoCgToRGB(fUpsampledColorAndWeight.xyz); + } + else { + RectifyHistory(params, clippingBox, fHistoryColor, fAccumulation, fLockContributionThisFrame, fThisFrameReactiveFactor, fLumaInstabilityFactor); + + Accumulate(params, fHistoryColor, fAccumulation, fUpsampledColorAndWeight); + } + + fHistoryColor = UnprepareRgb(fHistoryColor, Exposure()); + + FinalizeLockStatus(params, fLockStatus, fUpsampledColorAndWeight.w); + + // Get new temporal reactive factor + fTemporalReactiveFactor = ComputeTemporalReactiveFactor(params, fThisFrameReactiveFactor); + + StoreInternalColorAndWeight(iPxHrPos, FfxFloat32x4(fHistoryColor, fTemporalReactiveFactor)); + + // Output final color when RCAS is disabled +#if FFX_FSR2_OPTION_APPLY_SHARPENING == 0 + WriteUpscaledOutput(iPxHrPos, fHistoryColor); +#endif + StoreNewLocks(iPxHrPos, 0); +} + +#endif // FFX_FSR2_ACCUMULATE_H diff --git a/Shaders/shaders/fsr2/ffx_fsr2_accumulate.h.meta b/Shaders/shaders/fsr2/ffx_fsr2_accumulate.h.meta new file mode 100644 index 0000000..7b61108 --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_accumulate.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: ad56ff15a05a8c1479276e80fc3ea721 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr2/ffx_fsr2_callbacks_hlsl.h b/Shaders/shaders/fsr2/ffx_fsr2_callbacks_hlsl.h new file mode 100644 index 0000000..c52cc1a --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_callbacks_hlsl.h @@ -0,0 +1,945 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "ffx_fsr2_resources.h" + +#if defined(FFX_GPU) +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic push +#pragma dxc diagnostic ignored "-Wambig-lit-shift" +#endif //__hlsl_dx_compiler +#include "../ffx_core.h" +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic pop +#endif //__hlsl_dx_compiler + +#ifndef FFX_PREFER_WAVE64 +#define FFX_PREFER_WAVE64 +#endif // #ifndef FFX_PREFER_WAVE64 + +#pragma warning(disable: 3205) // conversion from larger type to smaller + +#define DECLARE_SRV_REGISTER(regIndex) t##regIndex +#define DECLARE_UAV_REGISTER(regIndex) u##regIndex +#define DECLARE_CB_REGISTER(regIndex) b##regIndex +#define FFX_FSR2_DECLARE_SRV(regIndex) register(DECLARE_SRV_REGISTER(regIndex)) +#define FFX_FSR2_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex)) +#define FFX_FSR2_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex)) + +#if defined(FSR2_BIND_CB_FSR2) + cbuffer cbFSR2 : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_FSR2) + { + FfxInt32x2 iRenderSize; + FfxInt32x2 iMaxRenderSize; + FfxInt32x2 iDisplaySize; + FfxInt32x2 iInputColorResourceDimensions; + FfxInt32x2 iLumaMipDimensions; + FfxInt32 iLumaMipLevelToUse; + FfxInt32 iFrameIndex; + + FfxFloat32x4 fDeviceToViewDepth; + FfxFloat32x2 fJitter; + FfxFloat32x2 fMotionVectorScale; + FfxFloat32x2 fDownscaleFactor; + FfxFloat32x2 fMotionVectorJitterCancellation; + FfxFloat32 fPreExposure; + FfxFloat32 fPreviousFramePreExposure; + FfxFloat32 fTanHalfFOV; + FfxFloat32 fJitterSequenceLength; + FfxFloat32 fDeltaTime; + FfxFloat32 fDynamicResChangeFactor; + FfxFloat32 fViewSpaceToMetersFactor; + FfxFloat32 fPadding; + }; + +#define FFX_FSR2_CONSTANT_BUFFER_1_SIZE 32 + +/* Define getter functions in the order they are defined in the CB! */ +FfxInt32x2 RenderSize() +{ + return iRenderSize; +} + +FfxInt32x2 MaxRenderSize() +{ + return iMaxRenderSize; +} + +FfxInt32x2 DisplaySize() +{ + return iDisplaySize; +} + +FfxInt32x2 InputColorResourceDimensions() +{ + return iInputColorResourceDimensions; +} + +FfxInt32x2 LumaMipDimensions() +{ + return iLumaMipDimensions; +} + +FfxInt32 LumaMipLevelToUse() +{ + return iLumaMipLevelToUse; +} + +FfxInt32 FrameIndex() +{ + return iFrameIndex; +} + +FfxFloat32x2 Jitter() +{ + return fJitter; +} + +FfxFloat32x4 DeviceToViewSpaceTransformFactors() +{ + return fDeviceToViewDepth; +} + +FfxFloat32x2 MotionVectorScale() +{ + return fMotionVectorScale; +} + +FfxFloat32x2 DownscaleFactor() +{ + return fDownscaleFactor; +} + +FfxFloat32x2 MotionVectorJitterCancellation() +{ + return fMotionVectorJitterCancellation; +} + +FfxFloat32 PreExposure() +{ + return fPreExposure; +} + +FfxFloat32 PreviousFramePreExposure() +{ + return fPreviousFramePreExposure; +} + +FfxFloat32 TanHalfFoV() +{ + return fTanHalfFOV; +} + +FfxFloat32 JitterSequenceLength() +{ + return fJitterSequenceLength; +} + +FfxFloat32 DeltaTime() +{ + return fDeltaTime; +} + +FfxFloat32 DynamicResChangeFactor() +{ + return fDynamicResChangeFactor; +} + +FfxFloat32 ViewSpaceToMetersFactor() +{ + return fViewSpaceToMetersFactor; +} +#endif // #if defined(FSR2_BIND_CB_FSR2) + +#define FFX_FSR2_ROOTSIG_STRINGIFY(p) FFX_FSR2_ROOTSIG_STR(p) +#define FFX_FSR2_ROOTSIG_STR(p) #p +#define FFX_FSR2_ROOTSIG [RootSignature("DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "CBV(b0), " \ + "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \ + "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] + +#define FFX_FSR2_CONSTANT_BUFFER_2_SIZE 6 // Number of 32-bit values. This must be kept in sync with max( cbRCAS , cbSPD) size. + +#define FFX_FSR2_CB2_ROOTSIG [RootSignature("DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "CBV(b0), " \ + "CBV(b1), " \ + "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \ + "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] + +#define FFX_FSR2_CONSTANT_BUFFER_3_SIZE 4 // Number of 32-bit values. This must be kept in sync with cbGenerateReactive size. + +#define FFX_FSR2_REACTIVE_ROOTSIG [RootSignature("DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "CBV(b0), " \ + "CBV(b1), " \ + "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \ + "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] + +#if defined(FFX_FSR2_EMBED_ROOTSIG) +#define FFX_FSR2_EMBED_ROOTSIG_CONTENT FFX_FSR2_ROOTSIG +#define FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT FFX_FSR2_CB2_ROOTSIG +#define FFX_FSR2_EMBED_ROOTSIG_REACTIVE_CONTENT FFX_FSR2_REACTIVE_ROOTSIG +#else +#define FFX_FSR2_EMBED_ROOTSIG_CONTENT +#define FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT +#define FFX_FSR2_EMBED_ROOTSIG_REACTIVE_CONTENT +#endif // #if FFX_FSR2_EMBED_ROOTSIG + +#if defined(FSR2_BIND_CB_AUTOREACTIVE) +cbuffer cbGenerateReactive : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_AUTOREACTIVE) +{ + FfxFloat32 fTcThreshold; // 0.1 is a good starting value, lower will result in more TC pixels + FfxFloat32 fTcScale; + FfxFloat32 fReactiveScale; + FfxFloat32 fReactiveMax; +}; + +FfxFloat32 TcThreshold() +{ + return fTcThreshold; +} + +FfxFloat32 TcScale() +{ + return fTcScale; +} + +FfxFloat32 ReactiveScale() +{ + return fReactiveScale; +} + +FfxFloat32 ReactiveMax() +{ + return fReactiveMax; +} +#endif // #if defined(FSR2_BIND_CB_AUTOREACTIVE) + +#if defined(FSR2_BIND_CB_RCAS) +cbuffer cbRCAS : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_RCAS) +{ + FfxUInt32x4 rcasConfig; +}; + +FfxUInt32x4 RCASConfig() +{ + return rcasConfig; +} +#endif // #if defined(FSR2_BIND_CB_RCAS) + + +#if defined(FSR2_BIND_CB_REACTIVE) +cbuffer cbGenerateReactive : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_REACTIVE) +{ + FfxFloat32 gen_reactive_scale; + FfxFloat32 gen_reactive_threshold; + FfxFloat32 gen_reactive_binaryValue; + FfxUInt32 gen_reactive_flags; +}; + +FfxFloat32 GenReactiveScale() +{ + return gen_reactive_scale; +} + +FfxFloat32 GenReactiveThreshold() +{ + return gen_reactive_threshold; +} + +FfxFloat32 GenReactiveBinaryValue() +{ + return gen_reactive_binaryValue; +} + +FfxUInt32 GenReactiveFlags() +{ + return gen_reactive_flags; +} +#endif // #if defined(FSR2_BIND_CB_REACTIVE) + +#if defined(FSR2_BIND_CB_SPD) +cbuffer cbSPD : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_SPD) { + + FfxUInt32 mips; + FfxUInt32 numWorkGroups; + FfxUInt32x2 workGroupOffset; + FfxUInt32x2 renderSize; +}; + +FfxUInt32 MipCount() +{ + return mips; +} + +FfxUInt32 NumWorkGroups() +{ + return numWorkGroups; +} + +FfxUInt32x2 WorkGroupOffset() +{ + return workGroupOffset; +} + +FfxUInt32x2 SPD_RenderSize() +{ + return renderSize; +} +#endif // #if defined(FSR2_BIND_CB_SPD) + +// Declare and sample camera buffers as regular textures, unless overridden +#if !defined(UNITY_FSR_TEX2D) +#define UNITY_FSR_TEX2D(type) Texture2D +#endif +#if !defined(UNITY_FSR_RWTEX2D) +#define UNITY_FSR_RWTEX2D(type) RWTexture2D +#endif +#if !defined(UNITY_FSR_POS) +#define UNITY_FSR_POS(pxPos) (pxPos) +#endif +#if !defined(UNITY_FSR_UV) +#define UNITY_FSR_UV(uv) (uv) +#endif + +SamplerState s_PointClamp : register(s0); +SamplerState s_LinearClamp : register(s1); + + // SRVs + #if defined FSR2_BIND_SRV_INPUT_COLOR + UNITY_FSR_TEX2D(FfxFloat32x4) r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_COLOR); + #endif + #if defined FSR2_BIND_SRV_INPUT_OPAQUE_ONLY + UNITY_FSR_TEX2D(FfxFloat32x4) r_input_opaque_only : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY); + #endif + #if defined FSR2_BIND_SRV_INPUT_MOTION_VECTORS + UNITY_FSR_TEX2D(FfxFloat32x4) r_input_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_MOTION_VECTORS); + #endif + #if defined FSR2_BIND_SRV_INPUT_DEPTH + UNITY_FSR_TEX2D(FfxFloat32) r_input_depth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_DEPTH); + #endif + #if defined FSR2_BIND_SRV_INPUT_EXPOSURE + Texture2D r_input_exposure : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_EXPOSURE); + #endif + #if defined FSR2_BIND_SRV_AUTO_EXPOSURE + Texture2D r_auto_exposure : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_AUTO_EXPOSURE); + #endif + #if defined FSR2_BIND_SRV_REACTIVE_MASK + UNITY_FSR_TEX2D(FfxFloat32) r_reactive_mask : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_REACTIVE_MASK); + #endif + #if defined FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK + UNITY_FSR_TEX2D(FfxFloat32) r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK); + #endif + #if defined FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH + Texture2D r_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH); + #endif + #if defined FSR2_BIND_SRV_DILATED_MOTION_VECTORS + Texture2D r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_MOTION_VECTORS); + #endif + #if defined FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS + Texture2D r_previous_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS); + #endif + #if defined FSR2_BIND_SRV_DILATED_DEPTH + Texture2D r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_DEPTH); + #endif + #if defined FSR2_BIND_SRV_INTERNAL_UPSCALED + Texture2D r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INTERNAL_UPSCALED); + #endif + #if defined FSR2_BIND_SRV_LOCK_STATUS + Texture2D r_lock_status : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_STATUS); + #endif + #if defined FSR2_BIND_SRV_LOCK_INPUT_LUMA + Texture2D r_lock_input_luma : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_INPUT_LUMA); + #endif + #if defined FSR2_BIND_SRV_NEW_LOCKS + Texture2D r_new_locks : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_NEW_LOCKS); + #endif + #if defined FSR2_BIND_SRV_PREPARED_INPUT_COLOR + Texture2D r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR); + #endif + #if defined FSR2_BIND_SRV_LUMA_HISTORY + Texture2D r_luma_history : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY); + #endif + #if defined FSR2_BIND_SRV_RCAS_INPUT + Texture2D r_rcas_input : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RCAS_INPUT); + #endif + #if defined FSR2_BIND_SRV_LANCZOS_LUT + Texture2D r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LANCZOS_LUT); + #endif + #if defined FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS + Texture2D r_imgMips : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS); + #endif + #if defined FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT + Texture2D r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT); + #endif + #if defined FSR2_BIND_SRV_DILATED_REACTIVE_MASKS + Texture2D r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS); + #endif + + #if defined FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR + Texture2D r_input_prev_color_pre_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR); + #endif + #if defined FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR + Texture2D r_input_prev_color_post_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR); + #endif + + // UAV declarations + #if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH + RWTexture2D rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH); + #endif + #if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS + RWTexture2D rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_MOTION_VECTORS); + #endif + #if defined FSR2_BIND_UAV_DILATED_DEPTH + RWTexture2D rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_DEPTH); + #endif + #if defined FSR2_BIND_UAV_INTERNAL_UPSCALED + RWTexture2D rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_INTERNAL_UPSCALED); + #endif + #if defined FSR2_BIND_UAV_LOCK_STATUS + RWTexture2D rw_lock_status : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_STATUS); + #endif + #if defined FSR2_BIND_UAV_LOCK_INPUT_LUMA + RWTexture2D rw_lock_input_luma : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_INPUT_LUMA); + #endif + #if defined FSR2_BIND_UAV_NEW_LOCKS + RWTexture2D rw_new_locks : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_NEW_LOCKS); + #endif + #if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR + RWTexture2D rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR); + #endif + #if defined FSR2_BIND_UAV_LUMA_HISTORY + RWTexture2D rw_luma_history : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY); + #endif + #if defined FSR2_BIND_UAV_UPSCALED_OUTPUT + UNITY_FSR_RWTEX2D(FfxFloat32x4) rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_UPSCALED_OUTPUT); + #endif + #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE + globallycoherent RWTexture2D rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE); + #endif + #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 + globallycoherent RWTexture2D rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_5); + #endif + #if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS + RWTexture2D rw_dilated_reactive_masks : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS); + #endif + #if defined FSR2_BIND_UAV_EXPOSURE + RWTexture2D rw_exposure : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE); + #endif + #if defined FSR2_BIND_UAV_AUTO_EXPOSURE + RWTexture2D rw_auto_exposure : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTO_EXPOSURE); + #endif + #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC + globallycoherent RWTexture2D rw_spd_global_atomic : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC); + #endif + + #if defined FSR2_BIND_UAV_AUTOREACTIVE + RWTexture2D rw_output_autoreactive : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTOREACTIVE); + #endif + #if defined FSR2_BIND_UAV_AUTOCOMPOSITION + RWTexture2D rw_output_autocomposition : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTOCOMPOSITION); + #endif + #if defined FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR + RWTexture2D rw_output_prev_color_pre_alpha : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR); + #endif + #if defined FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR + RWTexture2D rw_output_prev_color_post_alpha : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR); + #endif + +#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) +FfxFloat32 LoadMipLuma(FfxUInt32x2 iPxPos, FfxUInt32 mipLevel) +{ + return r_imgMips.mips[mipLevel][iPxPos]; +} +#endif + +#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) +FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxUInt32 mipLevel) +{ + return r_imgMips.SampleLevel(s_LinearClamp, fUV, mipLevel); +} +#endif + +#if defined(FSR2_BIND_SRV_INPUT_DEPTH) +FfxFloat32 LoadInputDepth(FfxUInt32x2 iPxPos) +{ + return r_input_depth[UNITY_FSR_POS(iPxPos)]; +} +#endif + +#if defined(FSR2_BIND_SRV_INPUT_DEPTH) +FfxFloat32 SampleInputDepth(FfxFloat32x2 fUV) +{ + return r_input_depth.SampleLevel(s_LinearClamp, UNITY_FSR_UV(fUV), 0).x; +} +#endif + +#if defined(FSR2_BIND_SRV_REACTIVE_MASK) +FfxFloat32 LoadReactiveMask(FfxUInt32x2 iPxPos) +{ + return r_reactive_mask[UNITY_FSR_POS(iPxPos)]; +} +#endif + +#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) +FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos) +{ + return r_transparency_and_composition_mask[UNITY_FSR_POS(iPxPos)]; +} +#endif + +#if defined(FSR2_BIND_SRV_INPUT_COLOR) +FfxFloat32x3 LoadInputColor(FfxUInt32x2 iPxPos) +{ + return r_input_color_jittered[UNITY_FSR_POS(iPxPos)].rgb; +} +#endif + +#if defined(FSR2_BIND_SRV_INPUT_COLOR) +FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV) +{ + return r_input_color_jittered.SampleLevel(s_LinearClamp, UNITY_FSR_UV(fUV), 0).rgb; +} +#endif + +#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) +FfxFloat32x3 LoadPreparedInputColor(FfxUInt32x2 iPxPos) +{ + return r_prepared_input_color[iPxPos].xyz; +} +#endif + +#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS) +FfxFloat32x2 LoadInputMotionVector(FfxUInt32x2 iPxDilatedMotionVectorPos) +{ + FfxFloat32x2 fSrcMotionVector = r_input_motion_vectors[UNITY_FSR_POS(iPxDilatedMotionVectorPos)].xy; + + FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale(); + +#if FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS + fUvMotionVector -= MotionVectorJitterCancellation(); +#endif + + return fUvMotionVector; +} +#endif + +#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED) +FfxFloat32x4 LoadHistory(FfxUInt32x2 iPxHistory) +{ + return r_internal_upscaled_color[iPxHistory]; +} +#endif + +#if defined(FSR2_BIND_UAV_LUMA_HISTORY) +void StoreLumaHistory(FfxUInt32x2 iPxPos, FfxFloat32x4 fLumaHistory) +{ + rw_luma_history[iPxPos] = fLumaHistory; +} +#endif + +#if defined(FSR2_BIND_SRV_LUMA_HISTORY) +FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV) +{ + return r_luma_history.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +FfxFloat32x4 LoadRCAS_Input(FfxInt32x2 iPxPos) +{ +#if defined(FSR2_BIND_SRV_RCAS_INPUT) + return r_rcas_input[iPxPos]; +#else + return 0.0; +#endif +} + +#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) +void StoreReprojectedHistory(FfxUInt32x2 iPxHistory, FfxFloat32x4 fHistory) +{ + rw_internal_upscaled_color[iPxHistory] = fHistory; +} +#endif + +#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) +void StoreInternalColorAndWeight(FfxUInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight) +{ + rw_internal_upscaled_color[iPxPos] = fColorAndWeight; +} +#endif + +#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT) +void StoreUpscaledOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor) +{ + rw_upscaled_output[UNITY_FSR_POS(iPxPos)] = FfxFloat32x4(fColor, 1.f); +} +#endif + +//LOCK_LIFETIME_REMAINING == 0 +//Should make LockInitialLifetime() return a const 1.0f later +#if defined(FSR2_BIND_SRV_LOCK_STATUS) +FfxFloat32x2 LoadLockStatus(FfxUInt32x2 iPxPos) +{ + return r_lock_status[iPxPos]; +} +#endif + +#if defined(FSR2_BIND_UAV_LOCK_STATUS) +void StoreLockStatus(FfxUInt32x2 iPxPos, FfxFloat32x2 fLockStatus) +{ + rw_lock_status[iPxPos] = fLockStatus; +} +#endif + +#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA) +FfxFloat32 LoadLockInputLuma(FfxUInt32x2 iPxPos) +{ + return r_lock_input_luma[iPxPos]; +} +#endif + +#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA) +void StoreLockInputLuma(FfxUInt32x2 iPxPos, FfxFloat32 fLuma) +{ + rw_lock_input_luma[iPxPos] = fLuma; +} +#endif + +#if defined(FSR2_BIND_SRV_NEW_LOCKS) +FfxFloat32 LoadNewLocks(FfxUInt32x2 iPxPos) +{ + return r_new_locks[iPxPos]; +} +#endif + +#if defined(FSR2_BIND_UAV_NEW_LOCKS) +FfxFloat32 LoadRwNewLocks(FfxUInt32x2 iPxPos) +{ + return rw_new_locks[iPxPos]; +} +#endif + +#if defined(FSR2_BIND_UAV_NEW_LOCKS) +void StoreNewLocks(FfxUInt32x2 iPxPos, FfxFloat32 newLock) +{ + rw_new_locks[iPxPos] = newLock; +} +#endif + +#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR) +void StorePreparedInputColor(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped) +{ + rw_prepared_input_color[iPxPos] = fTonemapped; +} +#endif + +#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) +FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV) +{ + return r_prepared_input_color.SampleLevel(s_LinearClamp, fUV, 0).w; +} +#endif + +#if defined(FSR2_BIND_SRV_LOCK_STATUS) +FfxFloat32x2 SampleLockStatus(FfxFloat32x2 fUV) +{ + FfxFloat32x2 fLockStatus = r_lock_status.SampleLevel(s_LinearClamp, fUV, 0); + return fLockStatus; +} +#endif + +#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) +FfxFloat32 LoadReconstructedPrevDepth(FfxUInt32x2 iPxPos) +{ + return asfloat(r_reconstructed_previous_nearest_depth[iPxPos]); +} +#endif + +#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) +void StoreReconstructedDepth(FfxUInt32x2 iPxSample, FfxFloat32 fDepth) +{ + FfxUInt32 uDepth = asuint(fDepth); + + #if FFX_FSR2_OPTION_INVERTED_DEPTH + InterlockedMax(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); + #else + InterlockedMin(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); // min for standard, max for inverted depth + #endif +} +#endif + +#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) +void SetReconstructedDepth(FfxUInt32x2 iPxSample, const FfxUInt32 uValue) +{ + rw_reconstructed_previous_nearest_depth[iPxSample] = uValue; +} +#endif + +#if defined(FSR2_BIND_UAV_DILATED_DEPTH) +void StoreDilatedDepth(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth) +{ + rw_dilatedDepth[iPxPos] = fDepth; +} +#endif + +#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) +void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector) +{ + rw_dilated_motion_vectors[iPxPos] = fMotionVector; +} +#endif + +#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) +FfxFloat32x2 LoadDilatedMotionVector(FfxUInt32x2 iPxInput) +{ + return r_dilated_motion_vectors[iPxInput].xy; +} +#endif + +#if defined(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) +FfxFloat32x2 LoadPreviousDilatedMotionVector(FfxUInt32x2 iPxInput) +{ + return r_previous_dilated_motion_vectors[iPxInput].xy; +} + +FfxFloat32x2 SamplePreviousDilatedMotionVector(FfxFloat32x2 uv) +{ + return r_previous_dilated_motion_vectors.SampleLevel(s_LinearClamp, uv, 0).xy; +} +#endif + +#if defined(FSR2_BIND_SRV_DILATED_DEPTH) +FfxFloat32 LoadDilatedDepth(FfxUInt32x2 iPxInput) +{ + return r_dilatedDepth[iPxInput]; +} +#endif + +#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE) +FfxFloat32 Exposure() +{ + FfxFloat32 exposure = r_input_exposure[FfxUInt32x2(0, 0)].x; + + if (exposure == 0.0f) { + exposure = 1.0f; + } + + return exposure; +} +#endif + +#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE) +FfxFloat32 AutoExposure() +{ + FfxFloat32 exposure = r_auto_exposure[FfxUInt32x2(0, 0)].x; + + if (exposure == 0.0f) { + exposure = 1.0f; + } + + return exposure; +} +#endif + +FfxFloat32 SampleLanczos2Weight(FfxFloat32 x) +{ +#if defined(FSR2_BIND_SRV_LANCZOS_LUT) + return r_lanczos_lut.SampleLevel(s_LinearClamp, FfxFloat32x2(x / 2, 0.5f), 0); +#else + return 0.f; +#endif +} + +#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) +FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv) +{ + // Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range. + return FfxFloat32(2.0) * r_upsample_maximum_bias_lut.SampleLevel(s_LinearClamp, abs(uv) * 2.0, 0); +} +#endif + +#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) +FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV) +{ + return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) +FfxFloat32x2 LoadDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos) +{ + return r_dilated_reactive_masks[iPxPos]; +} +#endif + +#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS) +void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fDilatedReactiveMasks) +{ + rw_dilated_reactive_masks[iPxPos] = fDilatedReactiveMasks; +} +#endif + +#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) +FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return r_input_opaque_only[UNITY_FSR_POS(iPxPos)].xyz; +} +#endif + +#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) +FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return r_input_prev_color_pre_alpha[iPxPos]; +} +#endif + +#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) +FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return r_input_prev_color_post_alpha[iPxPos]; +} +#endif + +#if defined(FSR2_BIND_UAV_AUTOREACTIVE) +#if defined(FSR2_BIND_UAV_AUTOCOMPOSITION) +void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive) +{ + rw_output_autoreactive[iPxPos] = fReactive.x; + + rw_output_autocomposition[iPxPos] = fReactive.y; +} +#endif +#endif + +#if defined(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR) +void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) +{ + rw_output_prev_color_pre_alpha[iPxPos] = color; + +} +#endif + +#if defined(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR) +void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) +{ + rw_output_prev_color_post_alpha[iPxPos] = color; +} +#endif + +FfxFloat32x2 SPD_LoadExposureBuffer() +{ +#if defined FSR2_BIND_UAV_AUTO_EXPOSURE + return rw_auto_exposure[FfxInt32x2(0, 0)]; +#else + return FfxFloat32x2(0.f, 0.f); +#endif // #if defined FSR2_BIND_UAV_AUTO_EXPOSURE +} + +void SPD_SetExposureBuffer(FfxFloat32x2 value) +{ +#if defined FSR2_BIND_UAV_AUTO_EXPOSURE + rw_auto_exposure[FfxInt32x2(0, 0)] = value; +#endif // #if defined FSR2_BIND_UAV_AUTO_EXPOSURE +} + +FfxFloat32x4 SPD_LoadMipmap5(FfxInt32x2 iPxPos) +{ +#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 + return FfxFloat32x4(rw_img_mip_5[iPxPos], 0, 0, 0); +#else + return FfxFloat32x4(0.f, 0.f, 0.f, 0.f); +#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 +} + +void SPD_SetMipmap(FfxInt32x2 iPxPos, FfxUInt32 slice, FfxFloat32 value) +{ + switch (slice) + { + case FFX_FSR2_SHADING_CHANGE_MIP_LEVEL: +#if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE + rw_img_mip_shading_change[iPxPos] = value; +#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE + break; + case 5: +#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 + rw_img_mip_5[iPxPos] = value; +#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 + break; + default: + + // avoid flattened side effect +#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE) + rw_img_mip_shading_change[iPxPos] = rw_img_mip_shading_change[iPxPos]; +#elif defined(FSR2_BIND_UAV_EXPOSURE_MIP_5) + rw_img_mip_5[iPxPos] = rw_img_mip_5[iPxPos]; +#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 + break; + } +} + +void SPD_IncreaseAtomicCounter(inout FfxUInt32 spdCounter) +{ +#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC + InterlockedAdd(rw_spd_global_atomic[FfxInt32x2(0, 0)], 1, spdCounter); +#endif // #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC +} + +void SPD_ResetAtomicCounter() +{ +#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC + rw_spd_global_atomic[FfxInt32x2(0, 0)] = 0; +#endif // #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC +} + +#endif // #if defined(FFX_GPU) diff --git a/Shaders/shaders/fsr2/ffx_fsr2_callbacks_hlsl.h.meta b/Shaders/shaders/fsr2/ffx_fsr2_callbacks_hlsl.h.meta new file mode 100644 index 0000000..f044e59 --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_callbacks_hlsl.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: ee531e22337dd714aab845c65c30ab94 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr2/ffx_fsr2_common.h b/Shaders/shaders/fsr2/ffx_fsr2_common.h new file mode 100644 index 0000000..e46b66c --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_common.h @@ -0,0 +1,566 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#if !defined(FFX_FSR2_COMMON_H) +#define FFX_FSR2_COMMON_H + +#if defined(FFX_CPU) || defined(FFX_GPU) +//Locks +#define LOCK_LIFETIME_REMAINING 0 +#define LOCK_TEMPORAL_LUMA 1 +#endif // #if defined(FFX_CPU) || defined(FFX_GPU) + +#if defined(FFX_GPU) +FFX_STATIC const FfxFloat32 FSR2_FP16_MIN = 6.10e-05f; +FFX_STATIC const FfxFloat32 FSR2_FP16_MAX = 65504.0f; +FFX_STATIC const FfxFloat32 FSR2_EPSILON = 1e-03f; +FFX_STATIC const FfxFloat32 FSR2_TONEMAP_EPSILON = 1.0f / FSR2_FP16_MAX; +FFX_STATIC const FfxFloat32 FSR2_FLT_MAX = 3.402823466e+38f; +FFX_STATIC const FfxFloat32 FSR2_FLT_MIN = 1.175494351e-38f; + +// treat vector truncation warnings as errors +#pragma warning(error: 3206) + +// suppress warnings +#pragma warning(disable: 3205) // conversion from larger type to smaller +#pragma warning(disable: 3571) // in ffxPow(f, e), f could be negative + +// Reconstructed depth usage +FFX_STATIC const FfxFloat32 fReconstructedDepthBilinearWeightThreshold = 0.01f; + +// Accumulation +FFX_STATIC const FfxFloat32 fUpsampleLanczosWeightScale = 1.0f / 12.0f; +FFX_STATIC const FfxFloat32 fMaxAccumulationLanczosWeight = 1.0f; +FFX_STATIC const FfxFloat32 fAverageLanczosWeightPerFrame = 0.74f * fUpsampleLanczosWeightScale; // Average lanczos weight for jitter accumulated samples +FFX_STATIC const FfxFloat32 fAccumulationMaxOnMotion = 3.0f * fUpsampleLanczosWeightScale; + +// Auto exposure +FFX_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e8f; + +struct AccumulationPassCommonParams +{ + FfxInt32x2 iPxHrPos; + FfxFloat32x2 fHrUv; + FfxFloat32x2 fLrUv_HwSampler; + FfxFloat32x2 fMotionVector; + FfxFloat32x2 fReprojectedHrUv; + FfxFloat32 fHrVelocity; + FfxFloat32 fDepthClipFactor; + FfxFloat32 fDilatedReactiveFactor; + FfxFloat32 fAccumulationMask; + + FfxBoolean bIsResetFrame; + FfxBoolean bIsExistingSample; + FfxBoolean bIsNewSample; +}; + +struct LockState +{ + FfxBoolean NewLock; //Set for both unique new and re-locked new + FfxBoolean WasLockedPrevFrame; //Set to identify if the pixel was already locked (relock) +}; + +void InitializeNewLockSample(FFX_PARAMETER_OUT FfxFloat32x2 fLockStatus) +{ + fLockStatus = FfxFloat32x2(0, 0); +} + +#if FFX_HALF +void InitializeNewLockSample(FFX_PARAMETER_OUT FFX_MIN16_F2 fLockStatus) +{ + fLockStatus = FFX_MIN16_F2(0, 0); +} +#endif + + +void KillLock(FFX_PARAMETER_INOUT FfxFloat32x2 fLockStatus) +{ + fLockStatus[LOCK_LIFETIME_REMAINING] = 0; +} + +#if FFX_HALF +void KillLock(FFX_PARAMETER_INOUT FFX_MIN16_F2 fLockStatus) +{ + fLockStatus[LOCK_LIFETIME_REMAINING] = FFX_MIN16_F(0); +} +#endif + +struct RectificationBox +{ + FfxFloat32x3 boxCenter; + FfxFloat32x3 boxVec; + FfxFloat32x3 aabbMin; + FfxFloat32x3 aabbMax; + FfxFloat32 fBoxCenterWeight; +}; +#if FFX_HALF +struct RectificationBoxMin16 +{ + FFX_MIN16_F3 boxCenter; + FFX_MIN16_F3 boxVec; + FFX_MIN16_F3 aabbMin; + FFX_MIN16_F3 aabbMax; + FFX_MIN16_F fBoxCenterWeight; +}; +#endif + +void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBox rectificationBox) +{ + rectificationBox.fBoxCenterWeight = FfxFloat32(0); + + rectificationBox.boxCenter = FfxFloat32x3(0, 0, 0); + rectificationBox.boxVec = FfxFloat32x3(0, 0, 0); + rectificationBox.aabbMin = FfxFloat32x3(FSR2_FLT_MAX, FSR2_FLT_MAX, FSR2_FLT_MAX); + rectificationBox.aabbMax = -FfxFloat32x3(FSR2_FLT_MAX, FSR2_FLT_MAX, FSR2_FLT_MAX); +} +#if FFX_HALF +void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox) +{ + rectificationBox.fBoxCenterWeight = FFX_MIN16_F(0); + + rectificationBox.boxCenter = FFX_MIN16_F3(0, 0, 0); + rectificationBox.boxVec = FFX_MIN16_F3(0, 0, 0); + rectificationBox.aabbMin = FFX_MIN16_F3(FSR2_FP16_MAX, FSR2_FP16_MAX, FSR2_FP16_MAX); + rectificationBox.aabbMax = -FFX_MIN16_F3(FSR2_FP16_MAX, FSR2_FP16_MAX, FSR2_FP16_MAX); +} +#endif + +void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight) +{ + rectificationBox.aabbMin = colorSample; + rectificationBox.aabbMax = colorSample; + + FfxFloat32x3 weightedSample = colorSample * fSampleWeight; + rectificationBox.boxCenter = weightedSample; + rectificationBox.boxVec = colorSample * weightedSample; + rectificationBox.fBoxCenterWeight = fSampleWeight; +} + +void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight) +{ + if (bInitialSample) { + RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight); + } else { + rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample); + rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample); + + FfxFloat32x3 weightedSample = colorSample * fSampleWeight; + rectificationBox.boxCenter += weightedSample; + rectificationBox.boxVec += colorSample * weightedSample; + rectificationBox.fBoxCenterWeight += fSampleWeight; + } +} +#if FFX_HALF +void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight) +{ + rectificationBox.aabbMin = colorSample; + rectificationBox.aabbMax = colorSample; + + FFX_MIN16_F3 weightedSample = colorSample * fSampleWeight; + rectificationBox.boxCenter = weightedSample; + rectificationBox.boxVec = colorSample * weightedSample; + rectificationBox.fBoxCenterWeight = fSampleWeight; +} + +void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight) +{ + if (bInitialSample) { + RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight); + } else { + rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample); + rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample); + + FFX_MIN16_F3 weightedSample = colorSample * fSampleWeight; + rectificationBox.boxCenter += weightedSample; + rectificationBox.boxVec += colorSample * weightedSample; + rectificationBox.fBoxCenterWeight += fSampleWeight; + } +} +#endif + +void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBox rectificationBox) +{ + rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FfxFloat32(FSR2_EPSILON) ? rectificationBox.fBoxCenterWeight : FfxFloat32(1.f)); + rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight; + rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight; + FfxFloat32x3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter)); + rectificationBox.boxVec = stdDev; +} +#if FFX_HALF +void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox) +{ + rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FFX_MIN16_F(FSR2_EPSILON) ? rectificationBox.fBoxCenterWeight : FFX_MIN16_F(1.f)); + rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight; + rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight; + FFX_MIN16_F3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter)); + rectificationBox.boxVec = stdDev; +} +#endif + +FfxFloat32x3 SafeRcp3(FfxFloat32x3 v) +{ + return (all(FFX_NOT_EQUAL(v, FfxFloat32x3(0, 0, 0)))) ? (FfxFloat32x3(1, 1, 1) / v) : FfxFloat32x3(0, 0, 0); +} +#if FFX_HALF +FFX_MIN16_F3 SafeRcp3(FFX_MIN16_F3 v) +{ + return (all(FFX_NOT_EQUAL(v, FFX_MIN16_F3(0, 0, 0)))) ? (FFX_MIN16_F3(1, 1, 1) / v) : FFX_MIN16_F3(0, 0, 0); +} +#endif + +FfxFloat32 MinDividedByMax(const FfxFloat32 v0, const FfxFloat32 v1) +{ + const FfxFloat32 m = ffxMax(v0, v1); + return m != 0 ? ffxMin(v0, v1) / m : 0; +} + +#if FFX_HALF +FFX_MIN16_F MinDividedByMax(const FFX_MIN16_F v0, const FFX_MIN16_F v1) +{ + const FFX_MIN16_F m = ffxMax(v0, v1); + return m != FFX_MIN16_F(0) ? ffxMin(v0, v1) / m : FFX_MIN16_F(0); +} +#endif + +FfxFloat32x3 YCoCgToRGB(FfxFloat32x3 fYCoCg) +{ + FfxFloat32x3 fRgb; + + fRgb = FfxFloat32x3( + fYCoCg.x + fYCoCg.y - fYCoCg.z, + fYCoCg.x + fYCoCg.z, + fYCoCg.x - fYCoCg.y - fYCoCg.z); + + return fRgb; +} +#if FFX_HALF +FFX_MIN16_F3 YCoCgToRGB(FFX_MIN16_F3 fYCoCg) +{ + FFX_MIN16_F3 fRgb; + + fRgb = FFX_MIN16_F3( + fYCoCg.x + fYCoCg.y - fYCoCg.z, + fYCoCg.x + fYCoCg.z, + fYCoCg.x - fYCoCg.y - fYCoCg.z); + + return fRgb; +} +#endif + +FfxFloat32x3 RGBToYCoCg(FfxFloat32x3 fRgb) +{ + FfxFloat32x3 fYCoCg; + + fYCoCg = FfxFloat32x3( + 0.25f * fRgb.r + 0.5f * fRgb.g + 0.25f * fRgb.b, + 0.5f * fRgb.r - 0.5f * fRgb.b, + -0.25f * fRgb.r + 0.5f * fRgb.g - 0.25f * fRgb.b); + + return fYCoCg; +} +#if FFX_HALF +FFX_MIN16_F3 RGBToYCoCg(FFX_MIN16_F3 fRgb) +{ + FFX_MIN16_F3 fYCoCg; + + fYCoCg = FFX_MIN16_F3( + 0.25 * fRgb.r + 0.5 * fRgb.g + 0.25 * fRgb.b, + 0.5 * fRgb.r - 0.5 * fRgb.b, + -0.25 * fRgb.r + 0.5 * fRgb.g - 0.25 * fRgb.b); + + return fYCoCg; +} +#endif + +FfxFloat32 RGBToLuma(FfxFloat32x3 fLinearRgb) +{ + return dot(fLinearRgb, FfxFloat32x3(0.2126f, 0.7152f, 0.0722f)); +} +#if FFX_HALF +FFX_MIN16_F RGBToLuma(FFX_MIN16_F3 fLinearRgb) +{ + return dot(fLinearRgb, FFX_MIN16_F3(0.2126f, 0.7152f, 0.0722f)); +} +#endif + +FfxFloat32 RGBToPerceivedLuma(FfxFloat32x3 fLinearRgb) +{ + FfxFloat32 fLuminance = RGBToLuma(fLinearRgb); + + FfxFloat32 fPercievedLuminance = 0; + if (fLuminance <= 216.0f / 24389.0f) { + fPercievedLuminance = fLuminance * (24389.0f / 27.0f); + } + else { + fPercievedLuminance = ffxPow(fLuminance, 1.0f / 3.0f) * 116.0f - 16.0f; + } + + return fPercievedLuminance * 0.01f; +} +#if FFX_HALF +FFX_MIN16_F RGBToPerceivedLuma(FFX_MIN16_F3 fLinearRgb) +{ + FFX_MIN16_F fLuminance = RGBToLuma(fLinearRgb); + + FFX_MIN16_F fPercievedLuminance = FFX_MIN16_F(0); + if (fLuminance <= FFX_MIN16_F(216.0f / 24389.0f)) { + fPercievedLuminance = fLuminance * FFX_MIN16_F(24389.0f / 27.0f); + } + else { + fPercievedLuminance = ffxPow(fLuminance, FFX_MIN16_F(1.0f / 3.0f)) * FFX_MIN16_F(116.0f) - FFX_MIN16_F(16.0f); + } + + return fPercievedLuminance * FFX_MIN16_F(0.01f); +} +#endif + +FfxFloat32x3 Tonemap(FfxFloat32x3 fRgb) +{ + return fRgb / (ffxMax(ffxMax(0.f, fRgb.r), ffxMax(fRgb.g, fRgb.b)) + 1.f).xxx; +} + +FfxFloat32x3 InverseTonemap(FfxFloat32x3 fRgb) +{ + return fRgb / ffxMax(FSR2_TONEMAP_EPSILON, 1.f - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx; +} + +#if FFX_HALF +FFX_MIN16_F3 Tonemap(FFX_MIN16_F3 fRgb) +{ + return fRgb / (ffxMax(ffxMax(FFX_MIN16_F(0.f), fRgb.r), ffxMax(fRgb.g, fRgb.b)) + FFX_MIN16_F(1.f)).xxx; +} + +FFX_MIN16_F3 InverseTonemap(FFX_MIN16_F3 fRgb) +{ + return fRgb / ffxMax(FFX_MIN16_F(FSR2_TONEMAP_EPSILON), FFX_MIN16_F(1.f) - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx; +} +#endif + +FfxInt32x2 ClampLoad(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize) +{ + FfxInt32x2 result = iPxSample + iPxOffset; + result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x; + result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x; + result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y; + result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y; + return result; + + // return ffxMed3(iPxSample + iPxOffset, FfxInt32x2(0, 0), iTextureSize - FfxInt32x2(1, 1)); +} +#if FFX_HALF +FFX_MIN16_I2 ClampLoad(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize) +{ + FFX_MIN16_I2 result = iPxSample + iPxOffset; + result.x = (iPxOffset.x < 0) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x; + result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x; + result.y = (iPxOffset.y < 0) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y; + result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y; + return result; + + // return ffxMed3Half(iPxSample + iPxOffset, FFX_MIN16_I2(0, 0), iTextureSize - FFX_MIN16_I2(1, 1)); +} +#endif + +FfxFloat32x2 ClampUv(FfxFloat32x2 fUv, FfxInt32x2 iTextureSize, FfxInt32x2 iResourceSize) +{ + const FfxFloat32x2 fSampleLocation = fUv * iTextureSize; + const FfxFloat32x2 fClampedLocation = ffxMax(FfxFloat32x2(0.5f, 0.5f), ffxMin(fSampleLocation, FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f))); + const FfxFloat32x2 fClampedUv = fClampedLocation / FfxFloat32x2(iResourceSize); + + return fClampedUv; +} + +FfxBoolean IsOnScreen(FfxInt32x2 pos, FfxInt32x2 size) +{ + return all(FFX_LESS_THAN(FfxUInt32x2(pos), FfxUInt32x2(size))); +} +#if FFX_HALF +FfxBoolean IsOnScreen(FFX_MIN16_I2 pos, FFX_MIN16_I2 size) +{ + return all(FFX_LESS_THAN(FFX_MIN16_U2(pos), FFX_MIN16_U2(size))); +} +#endif + +FfxFloat32 ComputeAutoExposureFromLavg(FfxFloat32 Lavg) +{ + Lavg = exp(Lavg); + + const FfxFloat32 S = 100.0f; //ISO arithmetic speed + const FfxFloat32 K = 12.5f; + FfxFloat32 ExposureISO100 = log2((Lavg * S) / K); + + const FfxFloat32 q = 0.65f; + FfxFloat32 Lmax = (78.0f / (q * S)) * ffxPow(2.0f, ExposureISO100); + + return 1 / Lmax; +} +#if FFX_HALF +FFX_MIN16_F ComputeAutoExposureFromLavg(FFX_MIN16_F Lavg) +{ + Lavg = exp(Lavg); + + const FFX_MIN16_F S = FFX_MIN16_F(100.0f); //ISO arithmetic speed + const FFX_MIN16_F K = FFX_MIN16_F(12.5f); + const FFX_MIN16_F ExposureISO100 = log2((Lavg * S) / K); + + const FFX_MIN16_F q = FFX_MIN16_F(0.65f); + const FFX_MIN16_F Lmax = (FFX_MIN16_F(78.0f) / (q * S)) * ffxPow(FFX_MIN16_F(2.0f), ExposureISO100); + + return FFX_MIN16_F(1) / Lmax; +} +#endif + +FfxInt32x2 ComputeHrPosFromLrPos(FfxInt32x2 iPxLrPos) +{ + FfxFloat32x2 fSrcJitteredPos = FfxFloat32x2(iPxLrPos) + 0.5f - Jitter(); + FfxFloat32x2 fLrPosInHr = (fSrcJitteredPos / RenderSize()) * DisplaySize(); + FfxInt32x2 iPxHrPos = FfxInt32x2(floor(fLrPosInHr)); + return iPxHrPos; +} +#if FFX_HALF +FFX_MIN16_I2 ComputeHrPosFromLrPos(FFX_MIN16_I2 iPxLrPos) +{ + FFX_MIN16_F2 fSrcJitteredPos = FFX_MIN16_F2(iPxLrPos) + FFX_MIN16_F(0.5f) - FFX_MIN16_F2(Jitter()); + FFX_MIN16_F2 fLrPosInHr = (fSrcJitteredPos / FFX_MIN16_F2(RenderSize())) * FFX_MIN16_F2(DisplaySize()); + FFX_MIN16_I2 iPxHrPos = FFX_MIN16_I2(floor(fLrPosInHr)); + return iPxHrPos; +} +#endif + +FfxFloat32x2 ComputeNdc(FfxFloat32x2 fPxPos, FfxInt32x2 iSize) +{ + return fPxPos / FfxFloat32x2(iSize) * FfxFloat32x2(2.0f, -2.0f) + FfxFloat32x2(-1.0f, 1.0f); +} + +FfxFloat32 GetViewSpaceDepth(FfxFloat32 fDeviceDepth) +{ + const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors(); + + // fDeviceToViewDepth details found in ffx_fsr2.cpp + return (fDeviceToViewDepth[1] / (fDeviceDepth - fDeviceToViewDepth[0])); +} + +FfxFloat32 GetViewSpaceDepthInMeters(FfxFloat32 fDeviceDepth) +{ + return GetViewSpaceDepth(fDeviceDepth) * ViewSpaceToMetersFactor(); +} + +FfxFloat32x3 GetViewSpacePosition(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth) +{ + const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors(); + + const FfxFloat32 Z = GetViewSpaceDepth(fDeviceDepth); + + const FfxFloat32x2 fNdcPos = ComputeNdc(iViewportPos, iViewportSize); + const FfxFloat32 X = fDeviceToViewDepth[2] * fNdcPos.x * Z; + const FfxFloat32 Y = fDeviceToViewDepth[3] * fNdcPos.y * Z; + + return FfxFloat32x3(X, Y, Z); +} + +FfxFloat32x3 GetViewSpacePositionInMeters(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth) +{ + return GetViewSpacePosition(iViewportPos, iViewportSize, fDeviceDepth) * ViewSpaceToMetersFactor(); +} + +FfxFloat32 GetMaxDistanceInMeters() +{ +#if FFX_FSR2_OPTION_INVERTED_DEPTH + return GetViewSpaceDepth(0.0f) * ViewSpaceToMetersFactor(); +#else + return GetViewSpaceDepth(1.0f) * ViewSpaceToMetersFactor(); +#endif +} + +FfxFloat32x3 PrepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure, FfxFloat32 fPreExposure) +{ + fRgb /= fPreExposure; + fRgb *= fExposure; + + fRgb = clamp(fRgb, 0.0f, FSR2_FP16_MAX); + + return fRgb; +} + +FfxFloat32x3 UnprepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure) +{ + fRgb /= fExposure; + fRgb *= PreExposure(); + + return fRgb; +} + + +struct BilinearSamplingData +{ + FfxInt32x2 iOffsets[4]; + FfxFloat32 fWeights[4]; + FfxInt32x2 iBasePos; +}; + +BilinearSamplingData GetBilinearSamplingData(FfxFloat32x2 fUv, FfxInt32x2 iSize) +{ + BilinearSamplingData data; + + FfxFloat32x2 fPxSample = (fUv * iSize) - FfxFloat32x2(0.5f, 0.5f); + data.iBasePos = FfxInt32x2(floor(fPxSample)); + FfxFloat32x2 fPxFrac = ffxFract(fPxSample); + + data.iOffsets[0] = FfxInt32x2(0, 0); + data.iOffsets[1] = FfxInt32x2(1, 0); + data.iOffsets[2] = FfxInt32x2(0, 1); + data.iOffsets[3] = FfxInt32x2(1, 1); + + data.fWeights[0] = (1 - fPxFrac.x) * (1 - fPxFrac.y); + data.fWeights[1] = (fPxFrac.x) * (1 - fPxFrac.y); + data.fWeights[2] = (1 - fPxFrac.x) * (fPxFrac.y); + data.fWeights[3] = (fPxFrac.x) * (fPxFrac.y); + + return data; +} + +struct PlaneData +{ + FfxFloat32x3 fNormal; + FfxFloat32 fDistanceFromOrigin; +}; + +PlaneData GetPlaneFromPoints(FfxFloat32x3 fP0, FfxFloat32x3 fP1, FfxFloat32x3 fP2) +{ + PlaneData plane; + + FfxFloat32x3 v0 = fP0 - fP1; + FfxFloat32x3 v1 = fP0 - fP2; + plane.fNormal = normalize(cross(v0, v1)); + plane.fDistanceFromOrigin = -dot(fP0, plane.fNormal); + + return plane; +} + +FfxFloat32 PointToPlaneDistance(PlaneData plane, FfxFloat32x3 fPoint) +{ + return abs(dot(plane.fNormal, fPoint) + plane.fDistanceFromOrigin); +} + +#endif // #if defined(FFX_GPU) + +#endif //!defined(FFX_FSR2_COMMON_H) diff --git a/Shaders/shaders/fsr2/ffx_fsr2_common.h.meta b/Shaders/shaders/fsr2/ffx_fsr2_common.h.meta new file mode 100644 index 0000000..1357ddf --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_common.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 261b5f3ac23701a4aa6d399d43798324 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr2/ffx_fsr2_compute_luminance_pyramid.h b/Shaders/shaders/fsr2/ffx_fsr2_compute_luminance_pyramid.h new file mode 100644 index 0000000..d8d4820 --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_compute_luminance_pyramid.h @@ -0,0 +1,176 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +FFX_GROUPSHARED FfxUInt32 spdCounter; + +void SpdIncreaseAtomicCounter(FfxUInt32 slice) +{ + SPD_IncreaseAtomicCounter(spdCounter); +} + +FfxUInt32 SpdGetAtomicCounter() +{ + return spdCounter; +} + +void SpdResetAtomicCounter(FfxUInt32 slice) +{ + SPD_ResetAtomicCounter(); +} + +#ifndef SPD_PACKED_ONLY +FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16]; + +FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 tex, FfxUInt32 slice) +{ + FfxFloat32x2 fUv = (tex + 0.5f + Jitter()) / RenderSize(); + fUv = ClampUv(fUv, RenderSize(), InputColorResourceDimensions()); + FfxFloat32x3 fRgb = SampleInputColor(fUv); + + fRgb /= PreExposure(); + + //compute log luma + const FfxFloat32 fLogLuma = log(ffxMax(FSR2_EPSILON, RGBToLuma(fRgb))); + + // Make sure out of screen pixels contribute no value to the end result + const FfxFloat32 result = all(FFX_LESS_THAN(tex, RenderSize())) ? fLogLuma : 0.0f; + + return FfxFloat32x4(result, 0, 0, 0); +} + +FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice) +{ + return SPD_LoadMipmap5(tex); +} + +void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice) +{ + if (index == LumaMipLevelToUse() || index == 5) + { + SPD_SetMipmap(pix, index, outValue.r); + } + + if (index == MipCount() - 1) { //accumulate on 1x1 level + + if (all(FFX_EQUAL(pix, FfxInt32x2(0, 0)))) + { + FfxFloat32 prev = SPD_LoadExposureBuffer().y; + FfxFloat32 result = outValue.r; + + if (prev < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values + { + FfxFloat32 rate = 1.0f; + result = prev + (result - prev) * (1 - exp(-DeltaTime() * rate)); + } + FfxFloat32x2 spdOutput = FfxFloat32x2(ComputeAutoExposureFromLavg(result), result); + SPD_SetExposureBuffer(spdOutput); + } + } +} + +FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat32x4( + spdIntermediateR[x][y], + spdIntermediateG[x][y], + spdIntermediateB[x][y], + spdIntermediateA[x][y]); +} +void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) +{ + spdIntermediateR[x][y] = value.x; + spdIntermediateG[x][y] = value.y; + spdIntermediateB[x][y] = value.z; + spdIntermediateA[x][y] = value.w; +} +FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) +{ + return (v0 + v1 + v2 + v3) * 0.25f; +} +#endif + +// define fetch and store functions Packed +#if FFX_HALF + +FFX_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16]; +FFX_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16]; + +FfxFloat16x4 SpdLoadSourceImageH(FfxFloat32x2 tex, FfxUInt32 slice) +{ + return FfxFloat16x4(0, 0, 0, 0); +} + +FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice) +{ + return FfxFloat16x4(0, 0, 0, 0); +} + +void SpdStoreH(FfxInt32x2 p, FfxFloat16x4 value, FfxUInt32 mip, FfxUInt32 slice) +{ +} + +FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat16x4( + spdIntermediateRG[x][y].x, + spdIntermediateRG[x][y].y, + spdIntermediateBA[x][y].x, + spdIntermediateBA[x][y].y); +} + +void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value) +{ + spdIntermediateRG[x][y] = value.xy; + spdIntermediateBA[x][y] = value.zw; +} + +FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3) +{ + return (v0 + v1 + v2 + v3) * FfxFloat16(0.25); +} +#endif + +#include "../spd/ffx_spd.h" + +void ComputeAutoExposure(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex) +{ +#if FFX_HALF + SpdDownsampleH( + FfxUInt32x2(WorkGroupId.xy), + FfxUInt32(LocalThreadIndex), + FfxUInt32(MipCount()), + FfxUInt32(NumWorkGroups()), + FfxUInt32(WorkGroupId.z), + FfxUInt32x2(WorkGroupOffset())); +#else + SpdDownsample( + FfxUInt32x2(WorkGroupId.xy), + FfxUInt32(LocalThreadIndex), + FfxUInt32(MipCount()), + FfxUInt32(NumWorkGroups()), + FfxUInt32(WorkGroupId.z), + FfxUInt32x2(WorkGroupOffset())); +#endif +} diff --git a/Shaders/shaders/fsr2/ffx_fsr2_compute_luminance_pyramid.h.meta b/Shaders/shaders/fsr2/ffx_fsr2_compute_luminance_pyramid.h.meta new file mode 100644 index 0000000..1ea247c --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_compute_luminance_pyramid.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 86bf712594bf26449924ac6bb393e498 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr2/ffx_fsr2_depth_clip.h b/Shaders/shaders/fsr2/ffx_fsr2_depth_clip.h new file mode 100644 index 0000000..873ff4b --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_depth_clip.h @@ -0,0 +1,259 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR2_DEPTH_CLIP_H +#define FFX_FSR2_DEPTH_CLIP_H + +FFX_STATIC const FfxFloat32 DepthClipBaseScale = 4.0f; + +FfxFloat32 ComputeDepthClip(FfxFloat32x2 fUvSample, FfxFloat32 fCurrentDepthSample) +{ + FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(fCurrentDepthSample); + BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUvSample, RenderSize()); + + FfxFloat32 fDilatedSum = 0.0f; + FfxFloat32 fDepth = 0.0f; + FfxFloat32 fWeightSum = 0.0f; + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) { + + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset; + + if (IsOnScreen(iSamplePos, RenderSize())) { + const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex]; + if (fWeight > fReconstructedDepthBilinearWeightThreshold) { + + const FfxFloat32 fPrevDepthSample = LoadReconstructedPrevDepth(iSamplePos); + const FfxFloat32 fPrevNearestDepthViewSpace = GetViewSpaceDepth(fPrevDepthSample); + + const FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace; + + if (fDepthDiff > 0.0f) { + +#if FFX_FSR2_OPTION_INVERTED_DEPTH + const FfxFloat32 fPlaneDepth = ffxMin(fPrevDepthSample, fCurrentDepthSample); +#else + const FfxFloat32 fPlaneDepth = ffxMax(fPrevDepthSample, fCurrentDepthSample); +#endif + + const FfxFloat32x3 fCenter = GetViewSpacePosition(FfxInt32x2(RenderSize() * 0.5f), RenderSize(), fPlaneDepth); + const FfxFloat32x3 fCorner = GetViewSpacePosition(FfxInt32x2(0, 0), RenderSize(), fPlaneDepth); + + const FfxFloat32 fHalfViewportWidth = length(FfxFloat32x2(RenderSize())); + const FfxFloat32 fDepthThreshold = ffxMax(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace); + + const FfxFloat32 Ksep = 1.37e-05f; + const FfxFloat32 Kfov = length(fCorner) / length(fCenter); + const FfxFloat32 fRequiredDepthSeparation = Ksep * Kfov * fHalfViewportWidth * fDepthThreshold; + + const FfxFloat32 fResolutionFactor = ffxSaturate(length(FfxFloat32x2(RenderSize())) / length(FfxFloat32x2(1920.0f, 1080.0f))); + const FfxFloat32 fPower = ffxLerp(1.0f, 3.0f, fResolutionFactor); + fDepth += ffxPow(ffxSaturate(FfxFloat32(fRequiredDepthSeparation / fDepthDiff)), fPower) * fWeight; + fWeightSum += fWeight; + } + } + } + } + + return (fWeightSum > 0) ? ffxSaturate(1.0f - fDepth / fWeightSum) : 0.0f; +} + +FfxFloat32 ComputeMotionDivergence(FfxInt32x2 iPxPos, FfxInt32x2 iPxInputMotionVectorSize) +{ + FfxFloat32 minconvergence = 1.0f; + + FfxFloat32x2 fMotionVectorNucleus = LoadInputMotionVector(iPxPos); + FfxFloat32 fNucleusVelocityLr = length(fMotionVectorNucleus * RenderSize()); + FfxFloat32 fMaxVelocityUv = length(fMotionVectorNucleus); + + const FfxFloat32 MotionVectorVelocityEpsilon = 1e-02f; + + if (fNucleusVelocityLr > MotionVectorVelocityEpsilon) { + for (FfxInt32 y = -1; y <= 1; ++y) { + for (FfxInt32 x = -1; x <= 1; ++x) { + + FfxInt32x2 sp = ClampLoad(iPxPos, FfxInt32x2(x, y), iPxInputMotionVectorSize); + + FfxFloat32x2 fMotionVector = LoadInputMotionVector(sp); + FfxFloat32 fVelocityUv = length(fMotionVector); + + fMaxVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv); + fVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv); + minconvergence = ffxMin(minconvergence, dot(fMotionVector / fVelocityUv, fMotionVectorNucleus / fVelocityUv)); + } + } + } + + return ffxSaturate(1.0f - minconvergence) * ffxSaturate(fMaxVelocityUv / 0.01f); +} + +FfxFloat32 ComputeDepthDivergence(FfxInt32x2 iPxPos) +{ + const FfxFloat32 fMaxDistInMeters = GetMaxDistanceInMeters(); + FfxFloat32 fDepthMax = 0.0f; + FfxFloat32 fDepthMin = fMaxDistInMeters; + + FfxInt32 iMaxDistFound = 0; + + for (FfxInt32 y = -1; y < 2; y++) { + for (FfxInt32 x = -1; x < 2; x++) { + + const FfxInt32x2 iOffset = FfxInt32x2(x, y); + const FfxInt32x2 iSamplePos = iPxPos + iOffset; + + const FfxFloat32 fOnScreenFactor = IsOnScreen(iSamplePos, RenderSize()) ? 1.0f : 0.0f; + FfxFloat32 fDepth = GetViewSpaceDepthInMeters(LoadDilatedDepth(iSamplePos)) * fOnScreenFactor; + + iMaxDistFound |= FfxInt32(fMaxDistInMeters == fDepth); + + fDepthMin = ffxMin(fDepthMin, fDepth); + fDepthMax = ffxMax(fDepthMax, fDepth); + } + } + + return (1.0f - fDepthMin / fDepthMax) * (FfxBoolean(iMaxDistFound) ? 0.0f : 1.0f); +} + +FfxFloat32 ComputeTemporalMotionDivergence(FfxInt32x2 iPxPos) +{ + const FfxFloat32x2 fUv = FfxFloat32x2(iPxPos + 0.5f) / RenderSize(); + + FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos); + FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; + fReprojectedUv = ClampUv(fReprojectedUv, RenderSize(), MaxRenderSize()); + FfxFloat32x2 fPrevMotionVector = SamplePreviousDilatedMotionVector(fReprojectedUv); + + float fPxDistance = length(fMotionVector * DisplaySize()); + return fPxDistance > 1.0f ? ffxLerp(0.0f, 1.0f - ffxSaturate(length(fPrevMotionVector) / length(fMotionVector)), ffxSaturate(ffxPow(fPxDistance / 20.0f, 3.0f))) : 0; +} + +void PreProcessReactiveMasks(FfxInt32x2 iPxLrPos, FfxFloat32 fMotionDivergence) +{ + // Compensate for bilinear sampling in accumulation pass + + FfxFloat32x3 fReferenceColor = LoadInputColor(iPxLrPos).xyz; + FfxFloat32x2 fReactiveFactor = FfxFloat32x2(0.0f, fMotionDivergence); + + float fMasksSum = 0.0f; + + FfxFloat32x3 fColorSamples[9]; + FfxFloat32 fReactiveSamples[9]; + FfxFloat32 fTransparencyAndCompositionSamples[9]; + + FFX_UNROLL + for (FfxInt32 y = -1; y < 2; y++) { + FFX_UNROLL + for (FfxInt32 x = -1; x < 2; x++) { + + const FfxInt32x2 sampleCoord = ClampLoad(iPxLrPos, FfxInt32x2(x, y), FfxInt32x2(RenderSize())); + + FfxInt32 sampleIdx = (y + 1) * 3 + x + 1; + + FfxFloat32x3 fColorSample = LoadInputColor(sampleCoord).xyz; + FfxFloat32 fReactiveSample = LoadReactiveMask(sampleCoord); + FfxFloat32 fTransparencyAndCompositionSample = LoadTransparencyAndCompositionMask(sampleCoord); + + fColorSamples[sampleIdx] = fColorSample; + fReactiveSamples[sampleIdx] = fReactiveSample; + fTransparencyAndCompositionSamples[sampleIdx] = fTransparencyAndCompositionSample; + + fMasksSum += (fReactiveSample + fTransparencyAndCompositionSample); + } + } + + if (fMasksSum > 0) + { + for (FfxInt32 sampleIdx = 0; sampleIdx < 9; sampleIdx++) + { + FfxFloat32x3 fColorSample = fColorSamples[sampleIdx]; + FfxFloat32 fReactiveSample = fReactiveSamples[sampleIdx]; + FfxFloat32 fTransparencyAndCompositionSample = fTransparencyAndCompositionSamples[sampleIdx]; + + const FfxFloat32 fMaxLenSq = ffxMax(dot(fReferenceColor, fReferenceColor), dot(fColorSample, fColorSample)); + const FfxFloat32 fSimilarity = dot(fReferenceColor, fColorSample) / fMaxLenSq; + + // Increase power for non-similar samples + const FfxFloat32 fPowerBiasMax = 6.0f; + const FfxFloat32 fSimilarityPower = 1.0f + (fPowerBiasMax - fSimilarity * fPowerBiasMax); + const FfxFloat32 fWeightedReactiveSample = ffxPow(fReactiveSample, fSimilarityPower); + const FfxFloat32 fWeightedTransparencyAndCompositionSample = ffxPow(fTransparencyAndCompositionSample, fSimilarityPower); + + fReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32x2(fWeightedReactiveSample, fWeightedTransparencyAndCompositionSample)); + } + } + + StoreDilatedReactiveMasks(iPxLrPos, fReactiveFactor); +} + +FfxFloat32x3 ComputePreparedInputColor(FfxInt32x2 iPxLrPos) +{ + //We assume linear data. if non-linear input (sRGB, ...), + //then we should convert to linear first and back to sRGB on output. + FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos)); + + fRgb = PrepareRgb(fRgb, Exposure(), PreExposure()); + + const FfxFloat32x3 fPreparedYCoCg = RGBToYCoCg(fRgb); + + return fPreparedYCoCg; +} + +FfxFloat32 EvaluateSurface(FfxInt32x2 iPxPos, FfxFloat32x2 fMotionVector) +{ + FfxFloat32 d0 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, -1))); + FfxFloat32 d1 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 0))); + FfxFloat32 d2 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 1))); + + return 1.0f - FfxFloat32(((d0 - d1) > (d1 * 0.01f)) && ((d1 - d2) > (d2 * 0.01f))); +} + +void DepthClip(FfxInt32x2 iPxPos) +{ + FfxFloat32x2 fDepthUv = (iPxPos + 0.5f) / RenderSize(); + FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos); + + // Discard tiny mvs + fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.01f); + + const FfxFloat32x2 fDilatedUv = fDepthUv + fMotionVector; + const FfxFloat32 fDilatedDepth = LoadDilatedDepth(iPxPos); + const FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(LoadInputDepth(iPxPos)); + + // Compute prepared input color and depth clip + FfxFloat32 fDepthClip = ComputeDepthClip(fDilatedUv, fDilatedDepth) * EvaluateSurface(iPxPos, fMotionVector); + FfxFloat32x3 fPreparedYCoCg = ComputePreparedInputColor(iPxPos); + StorePreparedInputColor(iPxPos, FfxFloat32x4(fPreparedYCoCg, fDepthClip)); + + // Compute dilated reactive mask +#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS + FfxInt32x2 iSamplePos = iPxPos; +#else + FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxPos); +#endif + + FfxFloat32 fMotionDivergence = ComputeMotionDivergence(iSamplePos, RenderSize()); + FfxFloat32 fTemporalMotionDifference = ffxSaturate(ComputeTemporalMotionDivergence(iPxPos) - ComputeDepthDivergence(iPxPos)); + + PreProcessReactiveMasks(iPxPos, ffxMax(fTemporalMotionDifference, fMotionDivergence)); +} + +#endif //!defined( FFX_FSR2_DEPTH_CLIPH ) diff --git a/Shaders/shaders/fsr2/ffx_fsr2_depth_clip.h.meta b/Shaders/shaders/fsr2/ffx_fsr2_depth_clip.h.meta new file mode 100644 index 0000000..49ed527 --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_depth_clip.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 37ba9aabf1f29eb4abc9b37df9829fde +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr2/ffx_fsr2_lock.h b/Shaders/shaders/fsr2/ffx_fsr2_lock.h new file mode 100644 index 0000000..4a1f6d5 --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_lock.h @@ -0,0 +1,116 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR2_LOCK_H +#define FFX_FSR2_LOCK_H + +void ClearResourcesForNextFrame(in FfxInt32x2 iPxHrPos) +{ + if (all(FFX_LESS_THAN(iPxHrPos, FfxInt32x2(RenderSize())))) + { +#if FFX_FSR2_OPTION_INVERTED_DEPTH + const FfxUInt32 farZ = 0x0; +#else + const FfxUInt32 farZ = 0x3f800000; +#endif + SetReconstructedDepth(iPxHrPos, farZ); + } +} + +FfxBoolean ComputeThinFeatureConfidence(FfxInt32x2 pos) +{ + const FfxInt32 RADIUS = 1; + + FfxFloat32 fNucleus = LoadLockInputLuma(pos); + + FfxFloat32 similar_threshold = 1.05f; + FfxFloat32 dissimilarLumaMin = FSR2_FLT_MAX; + FfxFloat32 dissimilarLumaMax = 0; + + /* + 0 1 2 + 3 4 5 + 6 7 8 + */ + + #define SETBIT(x) (1U << x) + + FfxUInt32 mask = SETBIT(4); //flag fNucleus as similar + + const FfxUInt32 uNumRejectionMasks = 4; + const FfxUInt32 uRejectionMasks[uNumRejectionMasks] = { + SETBIT(0) | SETBIT(1) | SETBIT(3) | SETBIT(4), //Upper left + SETBIT(1) | SETBIT(2) | SETBIT(4) | SETBIT(5), //Upper right + SETBIT(3) | SETBIT(4) | SETBIT(6) | SETBIT(7), //Lower left + SETBIT(4) | SETBIT(5) | SETBIT(7) | SETBIT(8), //Lower right + }; + + FfxInt32 idx = 0; + FFX_UNROLL + for (FfxInt32 y = -RADIUS; y <= RADIUS; y++) { + FFX_UNROLL + for (FfxInt32 x = -RADIUS; x <= RADIUS; x++, idx++) { + if (x == 0 && y == 0) continue; + + FfxInt32x2 samplePos = ClampLoad(pos, FfxInt32x2(x, y), FfxInt32x2(RenderSize())); + + FfxFloat32 sampleLuma = LoadLockInputLuma(samplePos); + FfxFloat32 difference = ffxMax(sampleLuma, fNucleus) / ffxMin(sampleLuma, fNucleus); + + if (difference > 0 && (difference < similar_threshold)) { + mask |= SETBIT(idx); + } else { + dissimilarLumaMin = ffxMin(dissimilarLumaMin, sampleLuma); + dissimilarLumaMax = ffxMax(dissimilarLumaMax, sampleLuma); + } + } + } + + FfxBoolean isRidge = fNucleus > dissimilarLumaMax || fNucleus < dissimilarLumaMin; + + if (FFX_FALSE == isRidge) { + + return false; + } + + FFX_UNROLL + for (FfxInt32 i = 0; i < 4; i++) { + + if ((mask & uRejectionMasks[i]) == uRejectionMasks[i]) { + return false; + } + } + + return true; +} + +void ComputeLock(FfxInt32x2 iPxLrPos) +{ + if (ComputeThinFeatureConfidence(iPxLrPos)) + { + StoreNewLocks(ComputeHrPosFromLrPos(iPxLrPos), 1.f); + } + + // ClearResourcesForNextFrame(iPxLrPos); +} + +#endif // FFX_FSR2_LOCK_H diff --git a/Shaders/shaders/fsr2/ffx_fsr2_lock.h.meta b/Shaders/shaders/fsr2/ffx_fsr2_lock.h.meta new file mode 100644 index 0000000..bf702fd --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_lock.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 77b94974e206e5045afda96cf9048139 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr2/ffx_fsr2_postprocess_lock_status.h b/Shaders/shaders/fsr2/ffx_fsr2_postprocess_lock_status.h new file mode 100644 index 0000000..90ef344 --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_postprocess_lock_status.h @@ -0,0 +1,107 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR2_POSTPROCESS_LOCK_STATUS_H +#define FFX_FSR2_POSTPROCESS_LOCK_STATUS_H + +FfxFloat32x4 WrapShadingChangeLuma(FfxInt32x2 iPxSample) +{ + return FfxFloat32x4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0); +} + +#if FFX_HALF +FFX_MIN16_F4 WrapShadingChangeLuma(FFX_MIN16_I2 iPxSample) +{ + return FFX_MIN16_F4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0); +} +#endif + +#if FFX_FSR2_OPTION_POSTPROCESSLOCKSTATUS_SAMPLERS_USE_DATA_HALF && FFX_HALF +DeclareCustomFetchBilinearSamplesMin16(FetchShadingChangeLumaSamples, WrapShadingChangeLuma) +#else +DeclareCustomFetchBicubicSamples(FetchShadingChangeLumaSamples, WrapShadingChangeLuma) +#endif +DeclareCustomTextureSample(ShadingChangeLumaSample, Lanczos2, FetchShadingChangeLumaSamples) + +FfxFloat32 GetShadingChangeLuma(FfxInt32x2 iPxHrPos, FfxFloat32x2 fUvCoord) +{ + FfxFloat32 fShadingChangeLuma = 0; + +#if 0 + fShadingChangeLuma = Exposure() * exp(ShadingChangeLumaSample(fUvCoord, LumaMipDimensions()).x); +#else + + const FfxFloat32 fDiv = FfxFloat32(2 << LumaMipLevelToUse()); + FfxInt32x2 iMipRenderSize = FfxInt32x2(RenderSize() / fDiv); + + fUvCoord = ClampUv(fUvCoord, iMipRenderSize, LumaMipDimensions()); + fShadingChangeLuma = Exposure() * exp(FfxFloat32(SampleMipLuma(fUvCoord, LumaMipLevelToUse()))); +#endif + + fShadingChangeLuma = ffxPow(fShadingChangeLuma, 1.0f / 6.0f); + + return fShadingChangeLuma; +} + +void UpdateLockStatus(AccumulationPassCommonParams params, + FFX_PARAMETER_INOUT FfxFloat32 fReactiveFactor, LockState state, + FFX_PARAMETER_INOUT FfxFloat32x2 fLockStatus, + FFX_PARAMETER_OUT FfxFloat32 fLockContributionThisFrame, + FFX_PARAMETER_OUT FfxFloat32 fLuminanceDiff) { + + const FfxFloat32 fShadingChangeLuma = GetShadingChangeLuma(params.iPxHrPos, params.fHrUv); + + //init temporal shading change factor, init to -1 or so in reproject to know if "true new"? + fLockStatus[LOCK_TEMPORAL_LUMA] = (fLockStatus[LOCK_TEMPORAL_LUMA] == FfxFloat32(0.0f)) ? fShadingChangeLuma : fLockStatus[LOCK_TEMPORAL_LUMA]; + + FfxFloat32 fPreviousShadingChangeLuma = fLockStatus[LOCK_TEMPORAL_LUMA]; + + fLuminanceDiff = 1.0f - MinDividedByMax(fPreviousShadingChangeLuma, fShadingChangeLuma); + + if (state.NewLock) { + fLockStatus[LOCK_TEMPORAL_LUMA] = fShadingChangeLuma; + + fLockStatus[LOCK_LIFETIME_REMAINING] = (fLockStatus[LOCK_LIFETIME_REMAINING] != 0.0f) ? 2.0f : 1.0f; + } + else if(fLockStatus[LOCK_LIFETIME_REMAINING] <= 1.0f) { + fLockStatus[LOCK_TEMPORAL_LUMA] = ffxLerp(fLockStatus[LOCK_TEMPORAL_LUMA], FfxFloat32(fShadingChangeLuma), 0.5f); + } + else { + if (fLuminanceDiff > 0.1f) { + KillLock(fLockStatus); + } + } + + fReactiveFactor = ffxMax(fReactiveFactor, ffxSaturate((fLuminanceDiff - 0.1f) * 10.0f)); + fLockStatus[LOCK_LIFETIME_REMAINING] *= (1.0f - fReactiveFactor); + + fLockStatus[LOCK_LIFETIME_REMAINING] *= ffxSaturate(1.0f - params.fAccumulationMask); + fLockStatus[LOCK_LIFETIME_REMAINING] *= FfxFloat32(params.fDepthClipFactor < 0.1f); + + // Compute this frame lock contribution + const FfxFloat32 fLifetimeContribution = ffxSaturate(fLockStatus[LOCK_LIFETIME_REMAINING] - 1.0f); + const FfxFloat32 fShadingChangeContribution = ffxSaturate(MinDividedByMax(fLockStatus[LOCK_TEMPORAL_LUMA], fShadingChangeLuma)); + + fLockContributionThisFrame = ffxSaturate(ffxSaturate(fLifetimeContribution * 4.0f) * fShadingChangeContribution); +} + +#endif //!defined( FFX_FSR2_POSTPROCESS_LOCK_STATUS_H ) diff --git a/Shaders/shaders/fsr2/ffx_fsr2_postprocess_lock_status.h.meta b/Shaders/shaders/fsr2/ffx_fsr2_postprocess_lock_status.h.meta new file mode 100644 index 0000000..f92a8d3 --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_postprocess_lock_status.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 04dd68d20ea907f4381636f90aae5bc9 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr2/ffx_fsr2_rcas.h b/Shaders/shaders/fsr2/ffx_fsr2_rcas.h new file mode 100644 index 0000000..fd5fd26 --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_rcas.h @@ -0,0 +1,67 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define GROUP_SIZE 8 +#define FSR_RCAS_DENOISE 1 + +#include "../ffx_core.h" + +void WriteUpscaledOutput(FFX_MIN16_U2 iPxHrPos, FfxFloat32x3 fUpscaledColor) +{ + StoreUpscaledOutput(FFX_MIN16_I2(iPxHrPos), fUpscaledColor); +} + +#define FSR_RCAS_F 1 +FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p) +{ + FfxFloat32x4 fColor = LoadRCAS_Input(p); + + fColor.rgb = PrepareRgb(fColor.rgb, Exposure(), PreExposure()); + + return fColor; +} +void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {} + +#include "../fsr1/ffx_fsr1.h" + +void CurrFilter(FFX_MIN16_U2 pos) +{ + FfxFloat32x3 c; + FsrRcasF(c.r, c.g, c.b, pos, RCASConfig()); + + c = UnprepareRgb(c, Exposure()); + + WriteUpscaledOutput(pos, c); +} + +void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid) +{ + // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. + FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u); + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.x += 8u; + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.y += 8u; + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.x -= 8u; + CurrFilter(FFX_MIN16_U2(gxy)); +} diff --git a/Shaders/shaders/fsr2/ffx_fsr2_rcas.h.meta b/Shaders/shaders/fsr2/ffx_fsr2_rcas.h.meta new file mode 100644 index 0000000..c121733 --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_rcas.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 1b6b9dc81790da347b42bf6a3a132479 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr2/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h b/Shaders/shaders/fsr2/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h new file mode 100644 index 0000000..1a4305d --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h @@ -0,0 +1,146 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H +#define FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H + +void ReconstructPrevDepth(FfxInt32x2 iPxPos, FfxFloat32 fDepth, FfxFloat32x2 fMotionVector, FfxInt32x2 iPxDepthSize) +{ + fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.1f); + + FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5)) / iPxDepthSize; + FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; + + BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, RenderSize()); + + // Project current depth into previous frame locations. + // Push to all pixels having some contribution if reprojection is using bilinear logic. + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) { + + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex]; + + if (fWeight > fReconstructedDepthBilinearWeightThreshold) { + + FfxInt32x2 iStorePos = bilinearInfo.iBasePos + iOffset; + if (IsOnScreen(iStorePos, iPxDepthSize)) { + StoreReconstructedDepth(iStorePos, fDepth); + } + } + } +} + +void FindNearestDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxInt32x2 iPxSize, FFX_PARAMETER_OUT FfxFloat32 fNearestDepth, FFX_PARAMETER_OUT FfxInt32x2 fNearestDepthCoord) +{ + const FfxInt32 iSampleCount = 9; + const FfxInt32x2 iSampleOffsets[iSampleCount] = { + FfxInt32x2(+0, +0), + FfxInt32x2(+1, +0), + FfxInt32x2(+0, +1), + FfxInt32x2(+0, -1), + FfxInt32x2(-1, +0), + FfxInt32x2(-1, +1), + FfxInt32x2(+1, +1), + FfxInt32x2(-1, -1), + FfxInt32x2(+1, -1), + }; + + // pull out the depth loads to allow SC to batch them + FfxFloat32 depth[9]; + FfxInt32 iSampleIndex = 0; + FFX_UNROLL + for (iSampleIndex = 0; iSampleIndex < iSampleCount; ++iSampleIndex) { + + FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex]; + depth[iSampleIndex] = LoadInputDepth(iPos); + } + + // find closest depth + fNearestDepthCoord = iPxPos; + fNearestDepth = depth[0]; + FFX_UNROLL + for (iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex) { + + FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex]; + if (IsOnScreen(iPos, iPxSize)) { + + FfxFloat32 fNdDepth = depth[iSampleIndex]; +#if FFX_FSR2_OPTION_INVERTED_DEPTH + if (fNdDepth > fNearestDepth) { +#else + if (fNdDepth < fNearestDepth) { +#endif + fNearestDepthCoord = iPos; + fNearestDepth = fNdDepth; + } + } + } +} + +FfxFloat32 ComputeLockInputLuma(FfxInt32x2 iPxLrPos) +{ + //We assume linear data. if non-linear input (sRGB, ...), + //then we should convert to linear first and back to sRGB on output. + FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos)); + + // Use internal auto exposure for locking logic + fRgb /= PreExposure(); + fRgb *= Exposure(); + +#if FFX_FSR2_OPTION_HDR_COLOR_INPUT + fRgb = Tonemap(fRgb); +#endif + + //compute luma used to lock pixels, if used elsewhere the ffxPow must be moved! + const FfxFloat32 fLockInputLuma = ffxPow(RGBToPerceivedLuma(fRgb), FfxFloat32(1.0 / 6.0)); + + return fLockInputLuma; +} + +void ReconstructAndDilate(FfxInt32x2 iPxLrPos) +{ + FfxFloat32 fDilatedDepth; + FfxInt32x2 iNearestDepthCoord; + + FindNearestDepth(iPxLrPos, RenderSize(), fDilatedDepth, iNearestDepthCoord); + +#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS + FfxInt32x2 iSamplePos = iPxLrPos; + FfxInt32x2 iMotionVectorPos = iNearestDepthCoord; +#else + FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxLrPos); + FfxInt32x2 iMotionVectorPos = ComputeHrPosFromLrPos(iNearestDepthCoord); +#endif + + FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iMotionVectorPos); + + StoreDilatedDepth(iPxLrPos, fDilatedDepth); + StoreDilatedMotionVector(iPxLrPos, fDilatedMotionVector); + + ReconstructPrevDepth(iPxLrPos, fDilatedDepth, fDilatedMotionVector, RenderSize()); + + FfxFloat32 fLockInputLuma = ComputeLockInputLuma(iPxLrPos); + StoreLockInputLuma(iPxLrPos, fLockInputLuma); +} + + +#endif //!defined( FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H ) diff --git a/Shaders/shaders/fsr2/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h.meta b/Shaders/shaders/fsr2/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h.meta new file mode 100644 index 0000000..3d37e71 --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: c7d2ce16ffba7e5459f6e88351b86355 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr2/ffx_fsr2_reproject.h b/Shaders/shaders/fsr2/ffx_fsr2_reproject.h new file mode 100644 index 0000000..386b297 --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_reproject.h @@ -0,0 +1,137 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR2_REPROJECT_H +#define FFX_FSR2_REPROJECT_H + +#ifndef FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE +#define FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE 0 // Reference +#endif + +FfxFloat32x4 WrapHistory(FfxInt32x2 iPxSample) +{ + return LoadHistory(iPxSample); +} + +#if FFX_HALF +FFX_MIN16_F4 WrapHistory(FFX_MIN16_I2 iPxSample) +{ + return FFX_MIN16_F4(LoadHistory(iPxSample)); +} +#endif + + +#if FFX_FSR2_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF +DeclareCustomFetchBicubicSamplesMin16(FetchHistorySamples, WrapHistory) +DeclareCustomTextureSampleMin16(HistorySample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples) +#else +DeclareCustomFetchBicubicSamples(FetchHistorySamples, WrapHistory) +DeclareCustomTextureSample(HistorySample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples) +#endif + +FfxFloat32x4 WrapLockStatus(FfxInt32x2 iPxSample) +{ + FfxFloat32x4 fSample = FfxFloat32x4(LoadLockStatus(iPxSample), 0.0f, 0.0f); + return fSample; +} + +#if FFX_HALF +FFX_MIN16_F4 WrapLockStatus(FFX_MIN16_I2 iPxSample) +{ + FFX_MIN16_F4 fSample = FFX_MIN16_F4(LoadLockStatus(iPxSample), 0.0, 0.0); + + return fSample; +} +#endif + +#if 1 +#if FFX_FSR2_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF +DeclareCustomFetchBilinearSamplesMin16(FetchLockStatusSamples, WrapLockStatus) +DeclareCustomTextureSampleMin16(LockStatusSample, Bilinear, FetchLockStatusSamples) +#else +DeclareCustomFetchBilinearSamples(FetchLockStatusSamples, WrapLockStatus) +DeclareCustomTextureSample(LockStatusSample, Bilinear, FetchLockStatusSamples) +#endif +#else +#if FFX_FSR2_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF +DeclareCustomFetchBicubicSamplesMin16(FetchLockStatusSamples, WrapLockStatus) +DeclareCustomTextureSampleMin16(LockStatusSample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchLockStatusSamples) +#else +DeclareCustomFetchBicubicSamples(FetchLockStatusSamples, WrapLockStatus) +DeclareCustomTextureSample(LockStatusSample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchLockStatusSamples) +#endif +#endif + +FfxFloat32x2 GetMotionVector(FfxInt32x2 iPxHrPos, FfxFloat32x2 fHrUv) +{ +#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS + FfxFloat32x2 fDilatedMotionVector = LoadDilatedMotionVector(FFX_MIN16_I2(fHrUv * RenderSize())); +#else + FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iPxHrPos); +#endif + + return fDilatedMotionVector; +} + +FfxBoolean IsUvInside(FfxFloat32x2 fUv) +{ + return (fUv.x >= 0.0f && fUv.x <= 1.0f) && (fUv.y >= 0.0f && fUv.y <= 1.0f); +} + +void ComputeReprojectedUVs(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxBoolean bIsExistingSample) +{ + fReprojectedHrUv = params.fHrUv + params.fMotionVector; + + bIsExistingSample = IsUvInside(fReprojectedHrUv); +} + +void ReprojectHistoryColor(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x3 fHistoryColor, FFX_PARAMETER_OUT FfxFloat32 fTemporalReactiveFactor, FFX_PARAMETER_OUT FfxBoolean bInMotionLastFrame) +{ + FfxFloat32x4 fHistory = HistorySample(params.fReprojectedHrUv, DisplaySize()); + + fHistoryColor = PrepareRgb(fHistory.rgb, Exposure(), PreviousFramePreExposure()); + + fHistoryColor = RGBToYCoCg(fHistoryColor); + + //Compute temporal reactivity info + fTemporalReactiveFactor = ffxSaturate(abs(fHistory.w)); + bInMotionLastFrame = (fHistory.w < 0.0f); +} + +LockState ReprojectHistoryLockStatus(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedLockStatus) +{ + LockState state = { FFX_FALSE, FFX_FALSE }; + const FfxFloat32 fNewLockIntensity = LoadRwNewLocks(params.iPxHrPos); + state.NewLock = fNewLockIntensity > (127.0f / 255.0f); + + FfxFloat32 fInPlaceLockLifetime = state.NewLock ? fNewLockIntensity : 0; + + fReprojectedLockStatus = SampleLockStatus(params.fReprojectedHrUv); + + if (fReprojectedLockStatus[LOCK_LIFETIME_REMAINING] != FfxFloat32(0.0f)) { + state.WasLockedPrevFrame = true; + } + + return state; +} + +#endif //!defined( FFX_FSR2_REPROJECT_H ) diff --git a/Shaders/shaders/fsr2/ffx_fsr2_reproject.h.meta b/Shaders/shaders/fsr2/ffx_fsr2_reproject.h.meta new file mode 100644 index 0000000..d99b60e --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_reproject.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: c888fea71de852c468ab21c525ce2320 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr2/ffx_fsr2_resources.h b/Shaders/shaders/fsr2/ffx_fsr2_resources.h new file mode 100644 index 0000000..a597c5a --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_resources.h @@ -0,0 +1,106 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR2_RESOURCES_H +#define FFX_FSR2_RESOURCES_H + +#if defined(FFX_CPU) || defined(FFX_GPU) +#define FFX_FSR2_RESOURCE_IDENTIFIER_NULL 0 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY 1 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR 2 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS 3 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH 4 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE 5 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK 6 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK 7 +#define FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH 8 +#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS 9 +#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH 10 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR 11 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS 12 +#define FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS 13 +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR 14 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY 15 +#define FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT 16 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT 17 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT 18 +#define FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT 19 +#define FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT 20 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1 21 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2 22 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1 23 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 24 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY 25 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_TRANSPARENCY_AND_COMPOSITION 26 +#define FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT 27 +#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS 28 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE 29 // same as FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0 29 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_1 30 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_2 31 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_3 32 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4 33 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5 34 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_6 35 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_7 36 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_8 37 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_9 38 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_10 39 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_11 40 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12 41 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE 42 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE 43 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE 44 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION 45 + +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR 46 +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR 47 +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1 48 +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1 49 +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2 50 +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2 51 +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS 52 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1 53 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2 54 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1 55 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2 56 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA 57 + +// Shading change detection mip level setting, value must be in the range [FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0, FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12] +#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4 +#define FFX_FSR2_SHADING_CHANGE_MIP_LEVEL (FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE - FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE) + +#define FFX_FSR2_RESOURCE_IDENTIFIER_COUNT 58 + +#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2 0 +#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD 1 +#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS 2 +#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE 3 + +#define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP 1 +#define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP 2 +#define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD 4 +#define FFX_FSR2_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX 8 + +#endif // #if defined(FFX_CPU) || defined(FFX_GPU) + +#endif //!defined( FFX_FSR2_RESOURCES_H ) diff --git a/Shaders/shaders/fsr2/ffx_fsr2_resources.h.meta b/Shaders/shaders/fsr2/ffx_fsr2_resources.h.meta new file mode 100644 index 0000000..4269f87 --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_resources.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: af595ef438ac2b4469f50e6f92485a6c +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr2/ffx_fsr2_sample.h b/Shaders/shaders/fsr2/ffx_fsr2_sample.h new file mode 100644 index 0000000..b75f090 --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_sample.h @@ -0,0 +1,606 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR2_SAMPLE_H +#define FFX_FSR2_SAMPLE_H + +// suppress warnings +#ifdef FFX_HLSL +#pragma warning(disable: 4008) // potentially divide by zero +#endif //FFX_HLSL + +struct FetchedBilinearSamples { + + FfxFloat32x4 fColor00; + FfxFloat32x4 fColor10; + + FfxFloat32x4 fColor01; + FfxFloat32x4 fColor11; +}; + +struct FetchedBicubicSamples { + + FfxFloat32x4 fColor00; + FfxFloat32x4 fColor10; + FfxFloat32x4 fColor20; + FfxFloat32x4 fColor30; + + FfxFloat32x4 fColor01; + FfxFloat32x4 fColor11; + FfxFloat32x4 fColor21; + FfxFloat32x4 fColor31; + + FfxFloat32x4 fColor02; + FfxFloat32x4 fColor12; + FfxFloat32x4 fColor22; + FfxFloat32x4 fColor32; + + FfxFloat32x4 fColor03; + FfxFloat32x4 fColor13; + FfxFloat32x4 fColor23; + FfxFloat32x4 fColor33; +}; + +#if FFX_HALF +struct FetchedBilinearSamplesMin16 { + + FFX_MIN16_F4 fColor00; + FFX_MIN16_F4 fColor10; + + FFX_MIN16_F4 fColor01; + FFX_MIN16_F4 fColor11; +}; + +struct FetchedBicubicSamplesMin16 { + + FFX_MIN16_F4 fColor00; + FFX_MIN16_F4 fColor10; + FFX_MIN16_F4 fColor20; + FFX_MIN16_F4 fColor30; + + FFX_MIN16_F4 fColor01; + FFX_MIN16_F4 fColor11; + FFX_MIN16_F4 fColor21; + FFX_MIN16_F4 fColor31; + + FFX_MIN16_F4 fColor02; + FFX_MIN16_F4 fColor12; + FFX_MIN16_F4 fColor22; + FFX_MIN16_F4 fColor32; + + FFX_MIN16_F4 fColor03; + FFX_MIN16_F4 fColor13; + FFX_MIN16_F4 fColor23; + FFX_MIN16_F4 fColor33; +}; +#else //FFX_HALF +#define FetchedBicubicSamplesMin16 FetchedBicubicSamples +#define FetchedBilinearSamplesMin16 FetchedBilinearSamples +#endif //FFX_HALF + +FfxFloat32x4 Linear(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32 t) +{ + return A + (B - A) * t; +} + +FfxFloat32x4 Bilinear(FetchedBilinearSamples BilinearSamples, FfxFloat32x2 fPxFrac) +{ + FfxFloat32x4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x); + FfxFloat32x4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x); + FfxFloat32x4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y); + return fColorXY; +} + +#if FFX_HALF +FFX_MIN16_F4 Linear(FFX_MIN16_F4 A, FFX_MIN16_F4 B, FFX_MIN16_F t) +{ + return A + (B - A) * t; +} + +FFX_MIN16_F4 Bilinear(FetchedBilinearSamplesMin16 BilinearSamples, FFX_MIN16_F2 fPxFrac) +{ + FFX_MIN16_F4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x); + FFX_MIN16_F4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x); + FFX_MIN16_F4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y); + return fColorXY; +} +#endif + +FfxFloat32 Lanczos2NoClamp(FfxFloat32 x) +{ + const FfxFloat32 PI = 3.141592653589793f; // TODO: share SDK constants + return abs(x) < FSR2_EPSILON ? 1.f : (sin(PI * x) / (PI * x)) * (sin(0.5f * PI * x) / (0.5f * PI * x)); +} + +FfxFloat32 Lanczos2(FfxFloat32 x) +{ + x = ffxMin(abs(x), 2.0f); + return Lanczos2NoClamp(x); +} + +#if FFX_HALF + +#if 0 +FFX_MIN16_F Lanczos2NoClamp(FFX_MIN16_F x) +{ + const FFX_MIN16_F PI = FFX_MIN16_F(3.141592653589793f); // TODO: share SDK constants + return abs(x) < FFX_MIN16_F(FSR2_EPSILON) ? FFX_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFX_MIN16_F(0.5f) * PI * x) / (FFX_MIN16_F(0.5f) * PI * x)); +} +#endif + +FFX_MIN16_F Lanczos2(FFX_MIN16_F x) +{ + x = ffxMin(abs(x), FFX_MIN16_F(2.0f)); + return FFX_MIN16_F(Lanczos2NoClamp(x)); +} +#endif //FFX_HALF + +// FSR1 lanczos approximation. Input is x*x and must be <= 4. +FfxFloat32 Lanczos2ApproxSqNoClamp(FfxFloat32 x2) +{ + FfxFloat32 a = (2.0f / 5.0f) * x2 - 1; + FfxFloat32 b = (1.0f / 4.0f) * x2 - 1; + return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b); +} + +#if FFX_HALF +FFX_MIN16_F Lanczos2ApproxSqNoClamp(FFX_MIN16_F x2) +{ + FFX_MIN16_F a = FFX_MIN16_F(2.0f / 5.0f) * x2 - FFX_MIN16_F(1); + FFX_MIN16_F b = FFX_MIN16_F(1.0f / 4.0f) * x2 - FFX_MIN16_F(1); + return (FFX_MIN16_F(25.0f / 16.0f) * a * a - FFX_MIN16_F(25.0f / 16.0f - 1)) * (b * b); +} +#endif //FFX_HALF + +FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2) +{ + x2 = ffxMin(x2, 4.0f); + return Lanczos2ApproxSqNoClamp(x2); +} + +#if FFX_HALF +FFX_MIN16_F Lanczos2ApproxSq(FFX_MIN16_F x2) +{ + x2 = ffxMin(x2, FFX_MIN16_F(4.0f)); + return Lanczos2ApproxSqNoClamp(x2); +} +#endif //FFX_HALF + +FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x) +{ + return Lanczos2ApproxSqNoClamp(x * x); +} + +#if FFX_HALF +FFX_MIN16_F Lanczos2ApproxNoClamp(FFX_MIN16_F x) +{ + return Lanczos2ApproxSqNoClamp(x * x); +} +#endif //FFX_HALF + +FfxFloat32 Lanczos2Approx(FfxFloat32 x) +{ + return Lanczos2ApproxSq(x * x); +} + +#if FFX_HALF +FFX_MIN16_F Lanczos2Approx(FFX_MIN16_F x) +{ + return Lanczos2ApproxSq(x * x); +} +#endif //FFX_HALF + +FfxFloat32 Lanczos2_UseLUT(FfxFloat32 x) +{ + return SampleLanczos2Weight(abs(x)); +} + +#if FFX_HALF +FFX_MIN16_F Lanczos2_UseLUT(FFX_MIN16_F x) +{ + return FFX_MIN16_F(SampleLanczos2Weight(abs(x))); +} +#endif //FFX_HALF + +FfxFloat32x4 Lanczos2_UseLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) +{ + FfxFloat32 fWeight0 = Lanczos2_UseLUT(-1.f - t); + FfxFloat32 fWeight1 = Lanczos2_UseLUT(-0.f - t); + FfxFloat32 fWeight2 = Lanczos2_UseLUT(+1.f - t); + FfxFloat32 fWeight3 = Lanczos2_UseLUT(+2.f - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} +#if FFX_HALF +FFX_MIN16_F4 Lanczos2_UseLUT(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t) +{ + FFX_MIN16_F fWeight0 = Lanczos2_UseLUT(FFX_MIN16_F(-1.f) - t); + FFX_MIN16_F fWeight1 = Lanczos2_UseLUT(FFX_MIN16_F(-0.f) - t); + FFX_MIN16_F fWeight2 = Lanczos2_UseLUT(FFX_MIN16_F(+1.f) - t); + FFX_MIN16_F fWeight3 = Lanczos2_UseLUT(FFX_MIN16_F(+2.f) - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} +#endif + +FfxFloat32x4 Lanczos2(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) +{ + FfxFloat32 fWeight0 = Lanczos2(-1.f - t); + FfxFloat32 fWeight1 = Lanczos2(-0.f - t); + FfxFloat32 fWeight2 = Lanczos2(+1.f - t); + FfxFloat32 fWeight3 = Lanczos2(+2.f - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} + +FfxFloat32x4 Lanczos2(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) +{ + FfxFloat32x4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FfxFloat32x4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FfxFloat32x4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FfxFloat32x4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FfxFloat32x4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FfxFloat32x4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; + FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { + + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); + + return fColorXY; +} + +#if FFX_HALF +FFX_MIN16_F4 Lanczos2(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t) +{ + FFX_MIN16_F fWeight0 = Lanczos2(FFX_MIN16_F(-1.f) - t); + FFX_MIN16_F fWeight1 = Lanczos2(FFX_MIN16_F(-0.f) - t); + FFX_MIN16_F fWeight2 = Lanczos2(FFX_MIN16_F(+1.f) - t); + FFX_MIN16_F fWeight3 = Lanczos2(FFX_MIN16_F(+2.f) - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} + +FFX_MIN16_F4 Lanczos2(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac) +{ + FFX_MIN16_F4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FFX_MIN16_F4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FFX_MIN16_F4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FFX_MIN16_F4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FFX_MIN16_F4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FFX_MIN16_F4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0]; + FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0]; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) + { + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); + + return fColorXY; +} +#endif //FFX_HALF + + +FfxFloat32x4 Lanczos2LUT(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) +{ + FfxFloat32x4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FfxFloat32x4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FfxFloat32x4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FfxFloat32x4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FfxFloat32x4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FfxFloat32x4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; + FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { + + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); + + return fColorXY; +} + +#if FFX_HALF +FFX_MIN16_F4 Lanczos2LUT(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac) +{ + FFX_MIN16_F4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FFX_MIN16_F4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FFX_MIN16_F4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FFX_MIN16_F4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FFX_MIN16_F4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FFX_MIN16_F4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0]; + FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0]; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) + { + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); + + return fColorXY; +} +#endif //FFX_HALF + + + +FfxFloat32x4 Lanczos2Approx(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) +{ + FfxFloat32 fWeight0 = Lanczos2ApproxNoClamp(-1.f - t); + FfxFloat32 fWeight1 = Lanczos2ApproxNoClamp(-0.f - t); + FfxFloat32 fWeight2 = Lanczos2ApproxNoClamp(+1.f - t); + FfxFloat32 fWeight3 = Lanczos2ApproxNoClamp(+2.f - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} + +#if FFX_HALF +FFX_MIN16_F4 Lanczos2Approx(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t) +{ + FFX_MIN16_F fWeight0 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-1.f) - t); + FFX_MIN16_F fWeight1 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-0.f) - t); + FFX_MIN16_F fWeight2 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+1.f) - t); + FFX_MIN16_F fWeight3 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+2.f) - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} +#endif //FFX_HALF + +FfxFloat32x4 Lanczos2Approx(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) +{ + FfxFloat32x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FfxFloat32x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FfxFloat32x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FfxFloat32x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FfxFloat32x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FfxFloat32x4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; + FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) + { + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); + + return fColorXY; +} + +#if FFX_HALF +FFX_MIN16_F4 Lanczos2Approx(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac) +{ + FFX_MIN16_F4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FFX_MIN16_F4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FFX_MIN16_F4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FFX_MIN16_F4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FFX_MIN16_F4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FFX_MIN16_F4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0]; + FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0]; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) + { + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); + + return fColorXY; +} +#endif + +// Clamp by offset direction. Assuming iPxSample is already in range and iPxOffset is compile time constant. +FfxInt32x2 ClampCoord(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize) +{ + FfxInt32x2 result = iPxSample + iPxOffset; + result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x; + result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x; + result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y; + result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y; + return result; +} +#if FFX_HALF +FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize) +{ + FFX_MIN16_I2 result = iPxSample + iPxOffset; + result.x = (iPxOffset.x < FFX_MIN16_I(0)) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x; + result.x = (iPxOffset.x > FFX_MIN16_I(0)) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x; + result.y = (iPxOffset.y < FFX_MIN16_I(0)) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y; + result.y = (iPxOffset.y > FFX_MIN16_I(0)) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y; + return result; +} +#endif //FFX_HALF + + +#define DeclareCustomFetchBicubicSamplesWithType(SampleType, TextureType, AddrType, Name, LoadTexture) \ + SampleType Name(AddrType iPxSample, AddrType iTextureSize) \ + { \ + SampleType Samples; \ + \ + Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, -1), iTextureSize))); \ + Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, -1), iTextureSize))); \ + Samples.fColor20 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, -1), iTextureSize))); \ + Samples.fColor30 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, -1), iTextureSize))); \ + \ + Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +0), iTextureSize))); \ + Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \ + Samples.fColor21 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \ + Samples.fColor31 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +0), iTextureSize))); \ + \ + Samples.fColor02 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +1), iTextureSize))); \ + Samples.fColor12 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \ + Samples.fColor22 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \ + Samples.fColor32 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +1), iTextureSize))); \ + \ + Samples.fColor03 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +2), iTextureSize))); \ + Samples.fColor13 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +2), iTextureSize))); \ + Samples.fColor23 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +2), iTextureSize))); \ + Samples.fColor33 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +2), iTextureSize))); \ + \ + return Samples; \ + } + +#define DeclareCustomFetchBicubicSamples(Name, LoadTexture) \ + DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture) + +#define DeclareCustomFetchBicubicSamplesMin16(Name, LoadTexture) \ + DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture) + +#define DeclareCustomFetchBilinearSamplesWithType(SampleType, TextureType,AddrType, Name, LoadTexture) \ + SampleType Name(AddrType iPxSample, AddrType iTextureSize) \ + { \ + SampleType Samples; \ + Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \ + Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \ + Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \ + Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \ + return Samples; \ + } + +#define DeclareCustomFetchBilinearSamples(Name, LoadTexture) \ + DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture) + +#define DeclareCustomFetchBilinearSamplesMin16(Name, LoadTexture) \ + DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture) + +// BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102) +// is common, so iPxSample can "jitter" +#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \ + FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ + { \ + FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \ + /* Clamp base coords */ \ + fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \ + fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \ + /* */ \ + FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ + FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \ + FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ + return fColorXY; \ + } + +#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \ + FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ + { \ + FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \ + /* Clamp base coords */ \ + fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \ + fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \ + /* */ \ + FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ + FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \ + FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ + return fColorXY; \ + } + +#define FFX_FSR2_CONCAT_ID(x, y) x ## y +#define FFX_FSR2_CONCAT(x, y) FFX_FSR2_CONCAT_ID(x, y) +#define FFX_FSR2_SAMPLER_1D_0 Lanczos2 +#define FFX_FSR2_SAMPLER_1D_1 Lanczos2LUT +#define FFX_FSR2_SAMPLER_1D_2 Lanczos2Approx + +#define FFX_FSR2_GET_LANCZOS_SAMPLER1D(x) FFX_FSR2_CONCAT(FFX_FSR2_SAMPLER_1D_, x) + +#endif //!defined( FFX_FSR2_SAMPLE_H ) diff --git a/Shaders/shaders/fsr2/ffx_fsr2_sample.h.meta b/Shaders/shaders/fsr2/ffx_fsr2_sample.h.meta new file mode 100644 index 0000000..5e24564 --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_sample.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 4468441a6f8d1a54c95a33bca10569a2 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr2/ffx_fsr2_tcr_autogen.h b/Shaders/shaders/fsr2/ffx_fsr2_tcr_autogen.h new file mode 100644 index 0000000..10970ef --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_tcr_autogen.h @@ -0,0 +1,251 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define USE_YCOCG 1 + +#define fAutogenEpsilon 0.01f + +// EXPERIMENTAL + +FFX_MIN16_F ComputeAutoTC_01(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) +{ + FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId); + FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId); + FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx); + FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx); + +#if USE_YCOCG + colorPreAlpha = RGBToYCoCg(colorPreAlpha); + colorPostAlpha = RGBToYCoCg(colorPostAlpha); + colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha); + colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha); +#endif + + FfxFloat32x3 colorDeltaCurr = colorPostAlpha - colorPreAlpha; + FfxFloat32x3 colorDeltaPrev = colorPrevPostAlpha - colorPrevPreAlpha; + bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDeltaCurr), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon))); + bool hadAlpha = any(FFX_GREATER_THAN(abs(colorDeltaPrev), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon))); + + FfxFloat32x3 X = colorPreAlpha; + FfxFloat32x3 Y = colorPostAlpha; + FfxFloat32x3 Z = colorPrevPreAlpha; + FfxFloat32x3 W = colorPrevPostAlpha; + + FFX_MIN16_F retVal = FFX_MIN16_F(ffxSaturate(dot(abs(abs(Y - X) - abs(W - Z)), FfxFloat32x3(1, 1, 1)))); + + // cleanup very small values + retVal = (retVal < TcThreshold()) ? FFX_MIN16_F(0.0f) : FFX_MIN16_F(1.f); + + return retVal; +} + +// works ok: thin edges +FFX_MIN16_F ComputeAutoTC_02(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) +{ + FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId); + FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId); + FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx); + FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx); + +#if USE_YCOCG + colorPreAlpha = RGBToYCoCg(colorPreAlpha); + colorPostAlpha = RGBToYCoCg(colorPostAlpha); + colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha); + colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha); +#endif + + FfxFloat32x3 colorDelta = colorPostAlpha - colorPreAlpha; + FfxFloat32x3 colorPrevDelta = colorPrevPostAlpha - colorPrevPreAlpha; + bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon))); + bool hadAlpha = any(FFX_GREATER_THAN(abs(colorPrevDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon))); + + FfxFloat32x3 delta = colorPostAlpha - colorPreAlpha; //prev+1*d = post => d = color, alpha = + FfxFloat32x3 deltaPrev = colorPrevPostAlpha - colorPrevPreAlpha; + + FfxFloat32x3 X = colorPrevPreAlpha; + FfxFloat32x3 N = colorPreAlpha - colorPrevPreAlpha; + FfxFloat32x3 YAminusXA = colorPrevPostAlpha - colorPrevPreAlpha; + FfxFloat32x3 NminusNA = colorPostAlpha - colorPrevPostAlpha; + + FfxFloat32x3 A = (hasAlpha || hadAlpha) ? NminusNA / max(FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon), N) : FfxFloat32x3(0, 0, 0); + + FFX_MIN16_F retVal = FFX_MIN16_F( max(max(A.x, A.y), A.z) ); + + // only pixels that have significantly changed in color shuold be considered + retVal = ffxSaturate(retVal * FFX_MIN16_F(length(colorPostAlpha - colorPrevPostAlpha)) ); + + return retVal; +} + +// This function computes the TransparencyAndComposition mask: +// This mask indicates pixels that should discard locks and apply color clamping. +// +// Typically this is the case for translucent pixels (that don't write depth values) or pixels where the correctness of +// the MVs can not be guaranteed (e.g. procedutal movement or vegetation that does not have MVs to reduce the cost during rasterization) +// Also, large changes in color due to changed lighting should be marked to remove locks on pixels with "old" lighting. +// +// This function takes a opaque only and a final texture and uses internal copies of those textures from the last frame. +// The function tries to determine where the color changes between opaque only and final image to determine the pixels that use transparency. +// Also it uses the previous frames and detects where the use of transparency changed to mark those pixels. +// Additionally it marks pixels where the color changed significantly in the opaque only image, e.g. due to lighting or texture animation. +// +// In the final step it stores the current textures in internal textures for the next frame + +FFX_MIN16_F ComputeTransparencyAndComposition(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) +{ + FFX_MIN16_F retVal = ComputeAutoTC_02(uDispatchThreadId, iPrevIdx); + + // [branch] + if (retVal > FFX_MIN16_F(0.01f)) + { + retVal = ComputeAutoTC_01(uDispatchThreadId, iPrevIdx); + } + return retVal; +} + +float computeSolidEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos) +{ + float lum[9]; + int i = 0; + for (int y = -1; y < 2; ++y) + { + for (int x = -1; x < 2; ++x) + { + FfxFloat32x3 curCol = LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb; + FfxFloat32x3 prevCol = LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb; + lum[i++] = length(curCol - prevCol); + } + } + + //float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]); + //float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]); + + //return sqrt(gradX * gradX + gradY * gradY); + + float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]); + float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]); + + return sqrt(sqrt(gradX * gradY)); +} + +float computeAlphaEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos) +{ + float lum[9]; + int i = 0; + for (int y = -1; y < 2; ++y) + { + for (int x = -1; x < 2; ++x) + { + FfxFloat32x3 curCol = abs(LoadInputColor(curPos + FFX_MIN16_I2(x, y)).rgb - LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb); + FfxFloat32x3 prevCol = abs(LoadPrevPostAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb - LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb); + lum[i++] = length(curCol - prevCol); + } + } + + //float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]); + //float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]); + + //return sqrt(gradX * gradX + gradY * gradY); + + float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]); + float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]); + + return sqrt(sqrt(gradX * gradY)); +} + +FFX_MIN16_F ComputeAabbOverlap(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) +{ + FFX_MIN16_F retVal = FFX_MIN16_F(0.f); + + FfxFloat32x2 fMotionVector = LoadInputMotionVector(uDispatchThreadId); + FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId); + FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId); + FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx); + FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx); + +#if USE_YCOCG + colorPreAlpha = RGBToYCoCg(colorPreAlpha); + colorPostAlpha = RGBToYCoCg(colorPostAlpha); + colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha); + colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha); +#endif + FfxFloat32x3 minPrev = FFX_MIN16_F3(+1000.f, +1000.f, +1000.f); + FfxFloat32x3 maxPrev = FFX_MIN16_F3(-1000.f, -1000.f, -1000.f); + for (int y = -1; y < 2; ++y) + { + for (int x = -1; x < 2; ++x) + { + FfxFloat32x3 W = LoadPrevPostAlpha(iPrevIdx + FFX_MIN16_I2(x, y)); + +#if USE_YCOCG + W = RGBToYCoCg(W); +#endif + minPrev = min(minPrev, W); + maxPrev = max(maxPrev, W); + } + } + // instead of computing the overlap: simply count how many samples are outside + // set reactive based on that + FFX_MIN16_F count = FFX_MIN16_F(0.f); + for (int y = -1; y < 2; ++y) + { + for (int x = -1; x < 2; ++x) + { + FfxFloat32x3 Y = LoadInputColor(uDispatchThreadId + FFX_MIN16_I2(x, y)); + +#if USE_YCOCG + Y = RGBToYCoCg(Y); +#endif + count += ((Y.x < minPrev.x) || (Y.x > maxPrev.x)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f); + count += ((Y.y < minPrev.y) || (Y.y > maxPrev.y)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f); + count += ((Y.z < minPrev.z) || (Y.z > maxPrev.z)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f); + } + } + retVal = count / FFX_MIN16_F(27.f); + + return retVal; +} + + +// This function computes the Reactive mask: +// We want pixels marked where the alpha portion of the frame changes a lot between neighbours +// Those pixels are expected to change quickly between frames, too. (e.g. small particles, reflections on curved surfaces...) +// As a result history would not be trustworthy. +// On the other hand we don't want pixels marked where pre-alpha has a large differnce, since those would profit from accumulation +// For mirrors we may assume the pre-alpha is pretty uniform color. +// +// This works well generally, but also marks edge pixels +FFX_MIN16_F ComputeReactive(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) +{ + // we only get here if alpha has a significant contribution and has changed since last frame. + FFX_MIN16_F retVal = FFX_MIN16_F(0.f); + + // mark pixels with huge variance in alpha as reactive + FFX_MIN16_F alphaEdge = FFX_MIN16_F(computeAlphaEdge(uDispatchThreadId, iPrevIdx)); + FFX_MIN16_F opaqueEdge = FFX_MIN16_F(computeSolidEdge(uDispatchThreadId, iPrevIdx)); + retVal = ffxSaturate(alphaEdge - opaqueEdge); + + // the above also marks edge pixels due to jitter, so we need to cancel those out + + + return retVal; +} diff --git a/Shaders/shaders/fsr2/ffx_fsr2_tcr_autogen.h.meta b/Shaders/shaders/fsr2/ffx_fsr2_tcr_autogen.h.meta new file mode 100644 index 0000000..2195de0 --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_tcr_autogen.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 8b62ba5e9686a6545b8ed6a8786322d2 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr2/ffx_fsr2_upsample.h b/Shaders/shaders/fsr2/ffx_fsr2_upsample.h new file mode 100644 index 0000000..9287185 --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_upsample.h @@ -0,0 +1,191 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR2_UPSAMPLE_H +#define FFX_FSR2_UPSAMPLE_H + +FFX_STATIC const FfxUInt32 iLanczos2SampleCount = 16; + +void Deringing(RectificationBox clippingBox, FFX_PARAMETER_INOUT FfxFloat32x3 fColor) +{ + fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax); +} +#if FFX_HALF +void Deringing(RectificationBoxMin16 clippingBox, FFX_PARAMETER_INOUT FFX_MIN16_F3 fColor) +{ + fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax); +} +#endif + +#ifndef FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE +#define FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE 2 // Approximate +#endif + +FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32 fKernelWeight) +{ + FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx; +#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE + FfxFloat32 fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased)); +#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT + FfxFloat32 fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased)); +#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE + FfxFloat32 fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); +#else +#error "Invalid Lanczos type" +#endif + return fSampleWeight; +} + +#if FFX_HALF +FFX_MIN16_F GetUpsampleLanczosWeight(FFX_MIN16_F2 fSrcSampleOffset, FFX_MIN16_F fKernelWeight) +{ + FFX_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx; +#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE + FFX_MIN16_F fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased)); +#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT + FFX_MIN16_F fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased)); +#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE + FFX_MIN16_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); + + // To Test: Save reciproqual sqrt compute + // FfxFloat32 fSampleWeight = Lanczos2Sq_UseLUT(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); +#else +#error "Invalid Lanczos type" +#endif + return fSampleWeight; +} +#endif + +FfxFloat32 ComputeMaxKernelWeight() { + const FfxFloat32 fKernelSizeBias = 1.0f; + + FfxFloat32 fKernelWeight = FfxFloat32(1) + (FfxFloat32(1.0f) / FfxFloat32x2(DownscaleFactor()) - FfxFloat32(1)).x * FfxFloat32(fKernelSizeBias); + + return ffxMin(FfxFloat32(1.99f), fKernelWeight); +} + +FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params, + FFX_PARAMETER_INOUT RectificationBox clippingBox, FfxFloat32 fReactiveFactor) +{ + #if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF + #include "ffx_fsr2_force16_begin.h" + #endif + // We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly) + FfxFloat32x2 fDstOutputPos = FfxFloat32x2(params.iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f); // Destination resolution output pixel center position + FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor(); // Source resolution output pixel center position + FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos)); // TODO: what about weird upscale factors... + + #if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF + #include "ffx_fsr2_force16_end.h" + #endif + + FfxFloat32x3 fSamples[iLanczos2SampleCount]; + + FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0 + + FfxInt32x2 offsetTL; + offsetTL.x = (fSrcUnjitteredPos.x > fSrcOutputPos.x) ? FfxInt32(-2) : FfxInt32(-1); + offsetTL.y = (fSrcUnjitteredPos.y > fSrcOutputPos.y) ? FfxInt32(-2) : FfxInt32(-1); + + //Load samples + // If fSrcUnjitteredPos.y > fSrcOutputPos.y, indicates offsetTL.y = -2, sample offset Y will be [-2, 1], clipbox will be rows [1, 3]. + // Flip row# for sampling offset in this case, so first 0~2 rows in the sampled array can always be used for computing the clipbox. + // This reduces branch or cmove on sampled colors, but moving this overhead to sample position / weight calculation time which apply to less values. + const FfxBoolean bFlipRow = fSrcUnjitteredPos.y > fSrcOutputPos.y; + const FfxBoolean bFlipCol = fSrcUnjitteredPos.x > fSrcOutputPos.x; + + FfxFloat32x2 fOffsetTL = FfxFloat32x2(offsetTL); + + FFX_UNROLL + for (FfxInt32 row = 0; row < 3; row++) { + + FFX_UNROLL + for (FfxInt32 col = 0; col < 3; col++) { + FfxInt32 iSampleIndex = col + (row << 2); + + FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); + FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + offsetTL + sampleColRow; + + const FfxInt32x2 sampleCoord = ClampLoad(iSrcSamplePos, FfxInt32x2(0, 0), FfxInt32x2(RenderSize())); + + fSamples[iSampleIndex] = LoadPreparedInputColor(FfxInt32x2(sampleCoord)); + } + } + + FfxFloat32x4 fColorAndWeight = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f); + + FfxFloat32x2 fBaseSampleOffset = FfxFloat32x2(fSrcUnjitteredPos - fSrcOutputPos); + + // Identify how much of each upsampled color to be used for this frame + const FfxFloat32 fKernelReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32(params.bIsNewSample)); + const FfxFloat32 fKernelBiasMax = ComputeMaxKernelWeight() * (1.0f - fKernelReactiveFactor); + + const FfxFloat32 fKernelBiasMin = ffxMax(1.0f, ((1.0f + fKernelBiasMax) * 0.3f)); + const FfxFloat32 fKernelBiasFactor = ffxMax(0.0f, ffxMax(0.25f * params.fDepthClipFactor, fKernelReactiveFactor)); + const FfxFloat32 fKernelBias = ffxLerp(fKernelBiasMax, fKernelBiasMin, fKernelBiasFactor); + + const FfxFloat32 fRectificationCurveBias = ffxLerp(-2.0f, -3.0f, ffxSaturate(params.fHrVelocity / 50.0f)); + + FFX_UNROLL + for (FfxInt32 row = 0; row < 3; row++) { + FFX_UNROLL + for (FfxInt32 col = 0; col < 3; col++) { + FfxInt32 iSampleIndex = col + (row << 2); + + const FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); + const FfxFloat32x2 fOffset = fOffsetTL + FfxFloat32x2(sampleColRow); + FfxFloat32x2 fSrcSampleOffset = fBaseSampleOffset + fOffset; + + FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow; + + const FfxFloat32 fOnScreenFactor = FfxFloat32(IsOnScreen(FfxInt32x2(iSrcSamplePos), FfxInt32x2(RenderSize()))); + FfxFloat32 fSampleWeight = fOnScreenFactor * FfxFloat32(GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelBias)); + + fColorAndWeight += FfxFloat32x4(fSamples[iSampleIndex] * fSampleWeight, fSampleWeight); + + // Update rectification box + { + const FfxFloat32 fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset); + const FfxFloat32 fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq); + + const FfxBoolean bInitialSample = (row == 0) && (col == 0); + RectificationBoxAddSample(bInitialSample, clippingBox, fSamples[iSampleIndex], fBoxSampleWeight); + } + } + } + + RectificationBoxComputeVarianceBoxData(clippingBox); + + fColorAndWeight.w *= FfxFloat32(fColorAndWeight.w > FSR2_EPSILON); + + if (fColorAndWeight.w > FSR2_EPSILON) { + // Normalize for deringing (we need to compare colors) + fColorAndWeight.xyz = fColorAndWeight.xyz / fColorAndWeight.w; + fColorAndWeight.w *= fUpsampleLanczosWeightScale; + + Deringing(clippingBox, fColorAndWeight.xyz); + } + + return fColorAndWeight; +} + +#endif //!defined( FFX_FSR2_UPSAMPLE_H ) diff --git a/Shaders/shaders/fsr2/ffx_fsr2_upsample.h.meta b/Shaders/shaders/fsr2/ffx_fsr2_upsample.h.meta new file mode 100644 index 0000000..5c0e011 --- /dev/null +++ b/Shaders/shaders/fsr2/ffx_fsr2_upsample.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 02358daf003aa3d408e0922790429182 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr3upscaler.meta b/Shaders/shaders/fsr3upscaler.meta new file mode 100644 index 0000000..b538189 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 2c8d127ec4826a54cae3fe1e7e2c17b8 +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h new file mode 100644 index 0000000..766cba3 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h @@ -0,0 +1,172 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +void Accumulate(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) +{ + // Avoid invalid values when accumulation and upsampled weight is 0 + data.fHistoryWeight *= FfxFloat32(data.fHistoryWeight > FSR3UPSCALER_FP16_MIN); + data.fHistoryWeight = ffxMax(FSR3UPSCALER_EPSILON, data.fHistoryWeight + data.fUpsampledWeight); + +#if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT + //YCoCg -> RGB -> Tonemap -> YCoCg (Use RGB tonemapper to avoid color desaturation) + data.fUpsampledColor = RGBToYCoCg(Tonemap(YCoCgToRGB(data.fUpsampledColor))); + data.fHistoryColor = RGBToYCoCg(Tonemap(YCoCgToRGB(data.fHistoryColor))); +#endif + + const FfxFloat32 fAlpha = ffxSaturate(data.fUpsampledWeight / data.fHistoryWeight); + data.fHistoryColor = ffxLerp(data.fHistoryColor, data.fUpsampledColor, fAlpha); + data.fHistoryColor = YCoCgToRGB(data.fHistoryColor); + +#if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT + data.fHistoryColor = InverseTonemap(data.fHistoryColor); +#endif +} + +void RectifyHistory( + const AccumulationPassCommonParams params, + FFX_PARAMETER_INOUT AccumulationPassData data +) +{ + const FfxFloat32 fVecolityFactor = ffxSaturate(params.f4KVelocity / 20.0f); + const FfxFloat32 fDistanceFactor = ffxSaturate(0.75f - params.fFarthestDepthInMeters / 20.0f); + const FfxFloat32 fAccumulationFactor = 1.0f - params.fAccumulation; + const FfxFloat32 fReactiveFactor = ffxPow(params.fReactiveMask, 1.0f / 2.0f); + const FfxFloat32 fShadingChangeFactor = params.fShadingChange; + const FfxFloat32 fBoxScaleT = ffxMax(fVecolityFactor, ffxMax(fDistanceFactor, ffxMax(fAccumulationFactor, ffxMax(fReactiveFactor, fShadingChangeFactor)))); + + const FfxFloat32 fBoxScale = ffxLerp(3.0f, 1.0f, fBoxScaleT); + + const FfxFloat32x3 fScaledBoxVec = data.clippingBox.boxVec * fBoxScale; + const FfxFloat32x3 fBoxMin = data.clippingBox.boxCenter - fScaledBoxVec; + const FfxFloat32x3 fBoxMax = data.clippingBox.boxCenter + fScaledBoxVec; + + if (any(FFX_GREATER_THAN(fBoxMin, data.fHistoryColor)) || any(FFX_GREATER_THAN(data.fHistoryColor, fBoxMax))) { + + const FfxFloat32x3 fClampedHistoryColor = clamp(data.fHistoryColor, fBoxMin, fBoxMax); + + const FfxFloat32 fHistoryContribution = ffxMax(params.fLumaInstabilityFactor, data.fLockContributionThisFrame) * params.fAccumulation * (1 - params.fDisocclusion); + + // Scale history color using rectification info, also using accumulation mask to avoid potential invalid color protection + data.fHistoryColor = ffxLerp(fClampedHistoryColor, data.fHistoryColor, ffxSaturate(fHistoryContribution)); + } +} + +void UpdateLockStatus(AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) +{ + data.fLock *= FfxFloat32(params.bIsNewSample == false); + + const FfxFloat32 fLifetimeDecreaseFactor = ffxMax(ffxSaturate(params.fShadingChange), ffxMax(params.fReactiveMask, params.fDisocclusion)); + data.fLock = ffxMax(0.0f, data.fLock - fLifetimeDecreaseFactor * fLockMax); + + // Compute this frame lock contribution + data.fLockContributionThisFrame = ffxSaturate(ffxSaturate(data.fLock - fLockThreshold) * (fLockMax - fLockThreshold)); + + const FfxFloat32 fNewLockIntensity = LoadRwNewLocks(params.iPxHrPos) * (1.0f - ffxMax(params.fShadingChange * 0, params.fReactiveMask)); + data.fLock = ffxMax(0.0f, ffxMin(data.fLock + fNewLockIntensity, fLockMax)); + + // Preparing for next frame + const FfxFloat32 fLifetimeDecrease = (0.1f / JitterSequenceLength()) * (1.0f - fLifetimeDecreaseFactor); + data.fLock = ffxMax(0.0f, data.fLock - fLifetimeDecrease); + + // we expect similar motion for next frame + // kill lock if that location is outside screen, avoid locks to be clamped to screen borders + const FfxFloat32x2 fEstimatedUvNextFrame = params.fHrUv - params.fMotionVector; + data.fLock *= FfxFloat32(IsUvInside(fEstimatedUvNextFrame) == true); +} + +void ComputeBaseAccumulationWeight(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) +{ + FfxFloat32 fBaseAccumulation = params.fAccumulation; + + fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, 0.15f, ffxSaturate(ffxMax(0.0f, params.f4KVelocity / 0.5f)))); + + data.fHistoryWeight = fBaseAccumulation; +} + +void InitPassData(FfxInt32x2 iPxHrPos, FFX_PARAMETER_INOUT AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) +{ + // Init constant params + params.iPxHrPos = iPxHrPos; + const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / UpscaleSize(); + params.fHrUv = fHrUv; + params.fLrUvJittered = fHrUv + Jitter() / RenderSize(); + params.fLrUv_HwSampler = ClampUv(params.fLrUvJittered, RenderSize(), MaxRenderSize()); + + params.fMotionVector = GetMotionVector(iPxHrPos, fHrUv); + params.f4KVelocity = Get4KVelocity(params.fMotionVector); + + ComputeReprojectedUVs(params, params.fReprojectedHrUv, params.bIsExistingSample); + + const FfxFloat32x2 fLumaInstabilityUv_HW = ClampUv(fHrUv, RenderSize(), MaxRenderSize()); + params.fLumaInstabilityFactor = SampleLumaInstability(fLumaInstabilityUv_HW); + + const FfxFloat32x2 fFarthestDepthUv = ClampUv(params.fLrUvJittered, RenderSize() / 2, GetFarthestDepthMip1ResourceDimensions()); + params.fFarthestDepthInMeters = SampleFarthestDepthMip1(fFarthestDepthUv); + params.bIsNewSample = (params.bIsExistingSample == false || 0 == FrameIndex()); + + const FfxFloat32x4 fDilatedReactiveMasks = SampleDilatedReactiveMasks(params.fLrUv_HwSampler); + params.fReactiveMask = ffxSaturate(fDilatedReactiveMasks[REACTIVE]); + params.fDisocclusion = ffxSaturate(fDilatedReactiveMasks[DISOCCLUSION]); + params.fShadingChange = ffxSaturate(fDilatedReactiveMasks[SHADING_CHANGE]); + params.fAccumulation = ffxSaturate(fDilatedReactiveMasks[ACCUMULAION]); + params.fAccumulation *= FfxFloat32(round(params.fAccumulation * 100.0f) > 1.0f); + + // Init variable data + data = (AccumulationPassData)0; + data.fUpsampledColor = FfxFloat32x3(0.0f, 0.0f, 0.0f); + data.fHistoryColor = FfxFloat32x3(0.0f, 0.0f, 0.0f); + data.fHistoryWeight = 1.0f; + data.fUpsampledWeight = 0.0f; + data.fLock = 0.0f; + data.fLockContributionThisFrame = 0.0f; +} + +void Accumulate(FfxInt32x2 iPxHrPos) +{ + AccumulationPassCommonParams params; + AccumulationPassData data; + InitPassData(iPxHrPos, params, data); + + if (params.bIsExistingSample && !params.bIsNewSample) { + ReprojectHistoryColor(params, data); + } + + UpdateLockStatus(params, data); + + ComputeBaseAccumulationWeight(params, data); + + ComputeUpsampledColorAndWeight(params, data); + + RectifyHistory(params, data); + + Accumulate(params, data); + + data.fHistoryColor /= Exposure(); + + StoreInternalColorAndWeight(iPxHrPos, FfxFloat32x4(data.fHistoryColor, data.fLock)); + + // Output final color when RCAS is disabled +#if FFX_FSR3UPSCALER_OPTION_APPLY_SHARPENING == 0 + StoreUpscaledOutput(iPxHrPos, data.fHistoryColor); +#endif + StoreNewLocks(iPxHrPos, 0); +} diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h.meta b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h.meta new file mode 100644 index 0000000..d84af26 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: cab06bfc45670ef42915e0e483e2c823 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h new file mode 100644 index 0000000..1c3fc99 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h @@ -0,0 +1,980 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "ffx_fsr3upscaler_resources.h" + +#if defined(FFX_GPU) +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic push +#pragma dxc diagnostic ignored "-Wambig-lit-shift" +#endif //__hlsl_dx_compiler +#include "../ffx_core.h" +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic pop +#endif //__hlsl_dx_compiler +#endif // #if defined(FFX_GPU) + +#if defined(FFX_GPU) +#ifndef FFX_PREFER_WAVE64 +#define FFX_PREFER_WAVE64 +#endif // FFX_PREFER_WAVE64 + +#pragma warning(disable: 3205) // conversion from larger type to smaller + +#define DECLARE_SRV_REGISTER(regIndex) t##regIndex +#define DECLARE_UAV_REGISTER(regIndex) u##regIndex +#define DECLARE_CB_REGISTER(regIndex) b##regIndex +#define FFX_FSR3UPSCALER_DECLARE_SRV(regIndex) register(DECLARE_SRV_REGISTER(regIndex)) +#define FFX_FSR3UPSCALER_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex)) +#define FFX_FSR3UPSCALER_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex)) + +#if defined(FSR3UPSCALER_BIND_CB_FSR3UPSCALER) +cbuffer cbFSR3Upscaler : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_FSR3UPSCALER) +{ + FfxInt32x2 iRenderSize; + FfxInt32x2 iPreviousFrameRenderSize; + + FfxInt32x2 iUpscaleSize; + FfxInt32x2 iPreviousFrameUpscaleSize; + + FfxInt32x2 iMaxRenderSize; + FfxInt32x2 iMaxUpscaleSize; + + FfxFloat32x4 fDeviceToViewDepth; + + FfxFloat32x2 fJitter; + FfxFloat32x2 fPreviousFrameJitter; + + FfxFloat32x2 fMotionVectorScale; + FfxFloat32x2 fDownscaleFactor; + + FfxFloat32x2 fMotionVectorJitterCancellation; + FfxFloat32 fTanHalfFOV; + FfxFloat32 fJitterSequenceLength; + + FfxFloat32 fDeltaTime; + FfxFloat32 fDeltaPreExposure; + FfxFloat32 fViewSpaceToMetersFactor; + FfxFloat32 fFrameIndex; +}; + +#define FFX_FSR3UPSCALER_CONSTANT_BUFFER_1_SIZE (sizeof(cbFSR3Upscaler) / 4) // Number of 32-bit values. This must be kept in sync with the cbFSR3Upscaler size. + +/* Define getter functions in the order they are defined in the CB! */ +FfxInt32x2 RenderSize() +{ + return iRenderSize; +} + +FfxInt32x2 PreviousFrameRenderSize() +{ + return iPreviousFrameRenderSize; +} + +FfxInt32x2 MaxRenderSize() +{ + return iMaxRenderSize; +} + +FfxInt32x2 UpscaleSize() +{ + return iUpscaleSize; +} + +FfxInt32x2 PreviousFrameUpscaleSize() +{ + return iPreviousFrameUpscaleSize; +} + +FfxInt32x2 MaxUpscaleSize() +{ + return iMaxUpscaleSize; +} + +FfxFloat32x2 Jitter() +{ + return fJitter; +} + +FfxFloat32x2 PreviousFrameJitter() +{ + return fPreviousFrameJitter; +} + +FfxFloat32x4 DeviceToViewSpaceTransformFactors() +{ + return fDeviceToViewDepth; +} + +FfxFloat32x2 MotionVectorScale() +{ + return fMotionVectorScale; +} + +FfxFloat32x2 DownscaleFactor() +{ + return fDownscaleFactor; +} + +FfxFloat32x2 MotionVectorJitterCancellation() +{ + return fMotionVectorJitterCancellation; +} + +FfxFloat32 TanHalfFoV() +{ + return fTanHalfFOV; +} + +FfxFloat32 JitterSequenceLength() +{ + return fJitterSequenceLength; +} + +FfxFloat32 DeltaTime() +{ + return fDeltaTime; +} + +FfxFloat32 DeltaPreExposure() +{ + return fDeltaPreExposure; +} + +FfxFloat32 ViewSpaceToMetersFactor() +{ + return fViewSpaceToMetersFactor; +} + +FfxFloat32 FrameIndex() +{ + return fFrameIndex; +} + +#endif // #if defined(FSR3UPSCALER_BIND_CB_FSR3UPSCALER) + +#define FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(p) FFX_FSR3UPSCALER_ROOTSIG_STR(p) +#define FFX_FSR3UPSCALER_ROOTSIG_STR(p) #p +#define FFX_FSR3UPSCALER_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "CBV(b0), " \ + "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \ + "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] + +#define FFX_FSR3UPSCALER_CONSTANT_BUFFER_2_SIZE 6 // Number of 32-bit values. This must be kept in sync with max( cbRCAS , cbSPD) size. + +#define FFX_FSR3UPSCALER_CB2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "CBV(b0), " \ + "CBV(b1), " \ + "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \ + "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] +#if defined(FFX_FSR3UPSCALER_EMBED_ROOTSIG) +#define FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT FFX_FSR3UPSCALER_ROOTSIG +#define FFX_FSR3UPSCALER_EMBED_CB2_ROOTSIG_CONTENT FFX_FSR3UPSCALER_CB2_ROOTSIG +#else +#define FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT +#define FFX_FSR3UPSCALER_EMBED_CB2_ROOTSIG_CONTENT +#endif // #if FFX_FSR3UPSCALER_EMBED_ROOTSIG + +#if defined(FSR3UPSCALER_BIND_CB_AUTOREACTIVE) +cbuffer cbGenerateReactive : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_AUTOREACTIVE) +{ + FfxFloat32 fTcThreshold; // 0.1 is a good starting value, lower will result in more TC pixels + FfxFloat32 fTcScale; + FfxFloat32 fReactiveScale; + FfxFloat32 fReactiveMax; +}; + +FfxFloat32 TcThreshold() +{ + return fTcThreshold; +} + +FfxFloat32 TcScale() +{ + return fTcScale; +} + +FfxFloat32 ReactiveScale() +{ + return fReactiveScale; +} + +FfxFloat32 ReactiveMax() +{ + return fReactiveMax; +} +#endif // #if defined(FSR3UPSCALER_BIND_CB_AUTOREACTIVE) + +#if defined(FSR3UPSCALER_BIND_CB_RCAS) +cbuffer cbRCAS : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_RCAS) +{ + FfxUInt32x4 rcasConfig; +}; + +FfxUInt32x4 RCASConfig() +{ + return rcasConfig; +} +#endif // #if defined(FSR3UPSCALER_BIND_CB_RCAS) + + +#if defined(FSR3UPSCALER_BIND_CB_REACTIVE) +cbuffer cbGenerateReactive : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_REACTIVE) +{ + FfxFloat32 gen_reactive_scale; + FfxFloat32 gen_reactive_threshold; + FfxFloat32 gen_reactive_binaryValue; + FfxUInt32 gen_reactive_flags; +}; + +FfxFloat32 GenReactiveScale() +{ + return gen_reactive_scale; +} + +FfxFloat32 GenReactiveThreshold() +{ + return gen_reactive_threshold; +} + +FfxFloat32 GenReactiveBinaryValue() +{ + return gen_reactive_binaryValue; +} + +FfxUInt32 GenReactiveFlags() +{ + return gen_reactive_flags; +} +#endif // #if defined(FSR3UPSCALER_BIND_CB_REACTIVE) + +#if defined(FSR3UPSCALER_BIND_CB_SPD) +cbuffer cbSPD : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_SPD) { + + FfxUInt32 mips; + FfxUInt32 numWorkGroups; + FfxUInt32x2 workGroupOffset; + FfxUInt32x2 renderSize; +}; + +FfxUInt32 MipCount() +{ + return mips; +} + +FfxUInt32 NumWorkGroups() +{ + return numWorkGroups; +} + +FfxUInt32x2 WorkGroupOffset() +{ + return workGroupOffset; +} + +FfxUInt32x2 SPD_RenderSize() +{ + return renderSize; +} +#endif // #if defined(FSR3UPSCALER_BIND_CB_SPD) + +// Declare and sample camera buffers as regular textures, unless overridden +#if !defined(UNITY_FSR_TEX2D) +#define UNITY_FSR_TEX2D(type) Texture2D +#endif +#if !defined(UNITY_FSR_RWTEX2D) +#define UNITY_FSR_RWTEX2D(type) RWTexture2D +#endif +#if !defined(UNITY_FSR_POS) +#define UNITY_FSR_POS(pxPos) (pxPos) +#endif +#if !defined(UNITY_FSR_UV) +#define UNITY_FSR_UV(uv) (uv) +#endif +#if !defined(UNITY_FSR_GETDIMS) +#define UNITY_FSR_GETDIMS(tex, w, h) (tex).GetDimensions((w), (h)) +#endif + +SamplerState s_PointClamp : register(s0); +SamplerState s_LinearClamp : register(s1); + +#if defined(FSR3UPSCALER_BIND_SRV_SPD_MIPS) +Texture2D r_spd_mips : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_SPD_MIPS); + +FfxInt32x2 GetSPDMipDimensions(FfxUInt32 uMipLevel) +{ + FfxUInt32 uWidth; + FfxUInt32 uHeight; + FfxUInt32 uLevels; + r_spd_mips.GetDimensions(uMipLevel, uWidth, uHeight, uLevels); + + return FfxInt32x2(uWidth, uHeight); +} + +FfxFloat32x2 SampleSPDMipLevel(FfxFloat32x2 fUV, FfxUInt32 mipLevel) +{ + return r_spd_mips.SampleLevel(s_LinearClamp, fUV, mipLevel); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INPUT_DEPTH) +UNITY_FSR_TEX2D(FfxFloat32) r_input_depth : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_DEPTH); + +FfxFloat32 LoadInputDepth(FfxUInt32x2 iPxPos) +{ + return r_input_depth[UNITY_FSR_POS(iPxPos)]; +} + +FfxFloat32 SampleInputDepth(FfxFloat32x2 fUV) +{ + return r_input_depth.SampleLevel(s_LinearClamp, UNITY_FSR_UV(fUV), 0).x; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_REACTIVE_MASK) +UNITY_FSR_TEX2D(FfxFloat32) r_reactive_mask : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_REACTIVE_MASK); + +FfxFloat32 LoadReactiveMask(FfxUInt32x2 iPxPos) +{ + return r_reactive_mask[UNITY_FSR_POS(iPxPos)]; +} + +FfxInt32x2 GetReactiveMaskResourceDimensions() +{ + FfxUInt32 uWidth; + FfxUInt32 uHeight; + UNITY_FSR_GETDIMS(r_reactive_mask, uWidth, uHeight); + + return FfxInt32x2(uWidth, uHeight); +} + +FfxFloat32 SampleReactiveMask(FfxFloat32x2 fUV) +{ + return r_reactive_mask.SampleLevel(s_LinearClamp, UNITY_FSR_UV(fUV), 0).x; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) +UNITY_FSR_TEX2D(FfxFloat32) r_transparency_and_composition_mask : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK); + +FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos) +{ + return r_transparency_and_composition_mask[UNITY_FSR_POS(iPxPos)]; +} + +FfxInt32x2 GetTransparencyAndCompositionMaskResourceDimensions() +{ + FfxUInt32 uWidth; + FfxUInt32 uHeight; + UNITY_FSR_GETDIMS(r_transparency_and_composition_mask, uWidth, uHeight); + + return FfxInt32x2(uWidth, uHeight); +} + +FfxFloat32 SampleTransparencyAndCompositionMask(FfxFloat32x2 fUV) +{ + return r_transparency_and_composition_mask.SampleLevel(s_LinearClamp, UNITY_FSR_UV(fUV), 0).x; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INPUT_COLOR) +UNITY_FSR_TEX2D(FfxFloat32x4) r_input_color_jittered : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_COLOR); + +FfxFloat32x3 LoadInputColor(FfxUInt32x2 iPxPos) +{ + return r_input_color_jittered[UNITY_FSR_POS(iPxPos)].rgb; +} + +FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV) +{ + return r_input_color_jittered.SampleLevel(s_LinearClamp, UNITY_FSR_UV(fUV), 0).rgb; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS) +UNITY_FSR_TEX2D(FfxFloat32x4) r_input_motion_vectors : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS); + +FfxFloat32x2 LoadInputMotionVector(FfxUInt32x2 iPxDilatedMotionVectorPos) +{ + FfxFloat32x2 fSrcMotionVector = r_input_motion_vectors[UNITY_FSR_POS(iPxDilatedMotionVectorPos)].xy; + + FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale(); + +#if FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS + fUvMotionVector -= MotionVectorJitterCancellation(); +#endif + + return fUvMotionVector; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED) +Texture2D r_internal_upscaled_color : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED); + +FfxFloat32x4 LoadHistory(FfxUInt32x2 iPxHistory) +{ + return r_internal_upscaled_color[iPxHistory]; +} + +FfxFloat32x4 SampleHistory(FfxFloat32x2 fUV) +{ + return r_internal_upscaled_color.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_LUMA_HISTORY) +RWTexture2D rw_luma_history : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_LUMA_HISTORY); + +void StoreLumaHistory(FfxUInt32x2 iPxPos, FfxFloat32x4 fLumaHistory) +{ + rw_luma_history[iPxPos] = fLumaHistory; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_LUMA_HISTORY) +Texture2D r_luma_history : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_LUMA_HISTORY); + +FfxFloat32x4 LoadLumaHistory(FfxInt32x2 iPxPos) +{ + return r_luma_history[iPxPos]; +} + +FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV) +{ + return r_luma_history.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_RCAS_INPUT) +Texture2D r_rcas_input : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_RCAS_INPUT); + +FfxFloat32x4 LoadRCAS_Input(FfxInt32x2 iPxPos) +{ + return r_rcas_input[iPxPos]; +} + +FfxFloat32x3 SampleRCAS_Input(FfxFloat32x2 fUV) +{ + return r_rcas_input.SampleLevel(s_LinearClamp, fUV, 0).rgb; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED) +RWTexture2D rw_internal_upscaled_color : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED); + +void StoreReprojectedHistory(FfxUInt32x2 iPxHistory, FfxFloat32x4 fHistory) +{ + rw_internal_upscaled_color[iPxHistory] = fHistory; +} + +void StoreInternalColorAndWeight(FfxUInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight) +{ + rw_internal_upscaled_color[iPxPos] = fColorAndWeight; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT) +UNITY_FSR_RWTEX2D(FfxFloat32x4) rw_upscaled_output : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT); + +void StoreUpscaledOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor) +{ + rw_upscaled_output[UNITY_FSR_POS(iPxPos)] = FfxFloat32x4(fColor, 1.f); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_ACCUMULATION) +Texture2D r_accumulation : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_ACCUMULATION); + +FfxFloat32 SampleAccumulation(FfxFloat32x2 fUV) +{ + return r_accumulation.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_ACCUMULATION) +RWTexture2D rw_accumulation : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_ACCUMULATION); + +void StoreAccumulation(FfxUInt32x2 iPxPos, FfxFloat32 fAccumulation) +{ + rw_accumulation[iPxPos] = fAccumulation; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_SHADING_CHANGE) +Texture2D r_shading_change : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_SHADING_CHANGE); + +FfxFloat32 LoadShadingChange(FfxUInt32x2 iPxPos) +{ + return r_shading_change[iPxPos]; +} + +FfxFloat32 SampleShadingChange(FfxFloat32x2 fUV) +{ + return r_shading_change.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_SHADING_CHANGE) +RWTexture2D rw_shading_change : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SHADING_CHANGE); + +void StoreShadingChange(FfxUInt32x2 iPxPos, FfxFloat32 fShadingChange) +{ + rw_shading_change[iPxPos] = fShadingChange; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH) +Texture2D r_farthest_depth : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH); + +FfxInt32x2 GetFarthestDepthResourceDimensions() +{ + FfxUInt32 uWidth; + FfxUInt32 uHeight; + r_farthest_depth.GetDimensions(uWidth, uHeight); + + return FfxInt32x2(uWidth, uHeight); +} + +FfxFloat32 LoadFarthestDepth(FfxUInt32x2 iPxPos) +{ + return r_farthest_depth[iPxPos]; +} + +FfxFloat32 SampleFarthestDepth(FfxFloat32x2 fUV) +{ + return r_farthest_depth.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH) +RWTexture2D rw_farthest_depth : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH); + +void StoreFarthestDepth(FfxUInt32x2 iPxPos, FfxFloat32 fDepth) +{ + rw_farthest_depth[iPxPos] = fDepth; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH_MIP1) +Texture2D r_farthest_depth_mip1 : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH_MIP1); + +FfxInt32x2 GetFarthestDepthMip1ResourceDimensions() +{ + FfxUInt32 uWidth; + FfxUInt32 uHeight; + r_farthest_depth_mip1.GetDimensions(uWidth, uHeight); + + return FfxInt32x2(uWidth, uHeight); +} + +FfxFloat32 LoadFarthestDepthMip1(FfxUInt32x2 iPxPos) +{ + return r_farthest_depth_mip1[iPxPos]; +} + +FfxFloat32 SampleFarthestDepthMip1(FfxFloat32x2 fUV) +{ + return r_farthest_depth_mip1.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH_MIP1) +RWTexture2D rw_farthest_depth_mip1 : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH_MIP1); + +void StoreFarthestDepthMip1(FfxUInt32x2 iPxPos, FfxFloat32 fDepth) +{ + rw_farthest_depth_mip1[iPxPos] = fDepth; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_CURRENT_LUMA) +Texture2D r_current_luma : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_CURRENT_LUMA); + +FfxFloat32 LoadCurrentLuma(FfxUInt32x2 iPxPos) +{ + return r_current_luma[iPxPos]; +} + +FfxFloat32 SampleCurrentLuma(FfxFloat32x2 uv) +{ + return r_current_luma.SampleLevel(s_LinearClamp, uv, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_CURRENT_LUMA) +RWTexture2D rw_current_luma : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_CURRENT_LUMA); + +void StoreCurrentLuma(FfxUInt32x2 iPxPos, FfxFloat32 fLuma) +{ + rw_current_luma[iPxPos] = fLuma; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_LUMA_INSTABILITY) +Texture2D r_luma_instability : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_LUMA_INSTABILITY); + +FfxFloat32 SampleLumaInstability(FfxFloat32x2 uv) +{ + return r_luma_instability.SampleLevel(s_LinearClamp, uv, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_LUMA_INSTABILITY) +RWTexture2D rw_luma_instability : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_LUMA_INSTABILITY); + +void StoreLumaInstability(FfxUInt32x2 iPxPos, FfxFloat32 fLumaInstability) +{ + rw_luma_instability[iPxPos] = fLumaInstability; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_PREVIOUS_LUMA) +Texture2D r_previous_luma : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_PREVIOUS_LUMA); + +FfxFloat32 LoadPreviousLuma(FfxUInt32x2 iPxPos) +{ + return r_previous_luma[iPxPos]; +} + +FfxFloat32 SamplePreviousLuma(FfxFloat32x2 uv) +{ + return r_previous_luma.SampleLevel(s_LinearClamp, uv, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_NEW_LOCKS) +Texture2D r_new_locks : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_NEW_LOCKS); + +FfxFloat32 LoadNewLocks(FfxUInt32x2 iPxPos) +{ + return r_new_locks[iPxPos]; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_NEW_LOCKS) +RWTexture2D rw_new_locks : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_NEW_LOCKS); + +FfxFloat32 LoadRwNewLocks(FfxUInt32x2 iPxPos) +{ + return rw_new_locks[iPxPos]; +} + +void StoreNewLocks(FfxUInt32x2 iPxPos, FfxFloat32 newLock) +{ + rw_new_locks[iPxPos] = newLock; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) +Texture2D r_reconstructed_previous_nearest_depth : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH); + +FfxFloat32 LoadReconstructedPrevDepth(FfxUInt32x2 iPxPos) +{ + return asfloat(r_reconstructed_previous_nearest_depth[iPxPos]); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) +RWTexture2D rw_reconstructed_previous_nearest_depth : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH); + +void StoreReconstructedDepth(FfxUInt32x2 iPxSample, FfxFloat32 fDepth) +{ + FfxUInt32 uDepth = asuint(fDepth); + +#if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH + InterlockedMax(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); +#else + InterlockedMin(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); // min for standard, max for inverted depth +#endif +} + +void SetReconstructedDepth(FfxUInt32x2 iPxSample, const FfxUInt32 uValue) +{ + rw_reconstructed_previous_nearest_depth[iPxSample] = uValue; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_DILATED_DEPTH) +RWTexture2D rw_dilated_depth : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_DILATED_DEPTH); + +void StoreDilatedDepth(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth) +{ + rw_dilated_depth[iPxPos] = fDepth; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS) +RWTexture2D rw_dilated_motion_vectors : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS); + +void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector) +{ + rw_dilated_motion_vectors[iPxPos] = fMotionVector; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS) +Texture2D r_dilated_motion_vectors : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS); + +FfxFloat32x2 LoadDilatedMotionVector(FfxUInt32x2 iPxInput) +{ + return r_dilated_motion_vectors[iPxInput]; +} + +FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV) +{ + return r_dilated_motion_vectors.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_DILATED_DEPTH) +Texture2D r_dilated_depth : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_DILATED_DEPTH); + +FfxFloat32 LoadDilatedDepth(FfxUInt32x2 iPxInput) +{ + return r_dilated_depth[iPxInput]; +} + +FfxFloat32 SampleDilatedDepth(FfxFloat32x2 fUV) +{ + return r_dilated_depth.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE) +Texture2D r_input_exposure : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE); + +FfxFloat32 Exposure() +{ + FfxFloat32 exposure = r_input_exposure[FfxUInt32x2(0, 0)].x; + + if (exposure == 0.0f) { + exposure = 1.0f; + } + + return exposure; +} +#endif + +// BEGIN: FSR3UPSCALER_BIND_SRV_LANCZOS_LUT +#if defined(FSR3UPSCALER_BIND_SRV_LANCZOS_LUT) +Texture2D r_lanczos_lut : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_LANCZOS_LUT); +#endif + +FfxFloat32 SampleLanczos2Weight(FfxFloat32 x) +{ +#if defined(FSR3UPSCALER_BIND_SRV_LANCZOS_LUT) + return r_lanczos_lut.SampleLevel(s_LinearClamp, FfxFloat32x2(x / 2, 0.5f), 0); +#else + return 0.f; +#endif +} +// END: FSR3UPSCALER_BIND_SRV_LANCZOS_LUT + +#if defined(FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS) +Texture2D r_dilated_reactive_masks : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS); + +FfxFloat32x4 SampleDilatedReactiveMasks(FfxFloat32x2 fUV) +{ + return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS) +RWTexture2D rw_dilated_reactive_masks : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS); + +void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fDilatedReactiveMasks) +{ + rw_dilated_reactive_masks[iPxPos] = fDilatedReactiveMasks; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY) +UNITY_FSR_TEX2D(FfxFloat32x4) r_input_opaque_only : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY); + +FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return r_input_opaque_only[UNITY_FSR_POS(iPxPos)].xyz; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_PREV_PRE_ALPHA_COLOR) +Texture2D r_input_prev_color_pre_alpha : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_PREV_PRE_ALPHA_COLOR); + +FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return r_input_prev_color_pre_alpha[iPxPos]; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_PREV_POST_ALPHA_COLOR) +Texture2D r_input_prev_color_post_alpha : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_PREV_POST_ALPHA_COLOR); + +FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return r_input_prev_color_post_alpha[iPxPos]; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_AUTOREACTIVE) && \ + defined(FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION) + +RWTexture2D rw_output_autoreactive : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_AUTOREACTIVE); +RWTexture2D rw_output_autocomposition : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION); + +void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive) +{ + rw_output_autoreactive[iPxPos] = fReactive.x; + + rw_output_autocomposition[iPxPos] = fReactive.y; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR) +RWTexture2D rw_output_prev_color_pre_alpha : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR); + +void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) +{ + rw_output_prev_color_pre_alpha[iPxPos] = color; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR) +RWTexture2D rw_output_prev_color_post_alpha : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR); + +void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) +{ + rw_output_prev_color_post_alpha[iPxPos] = color; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_FRAME_INFO) +RWTexture2D rw_frame_info : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_FRAME_INFO); + +FfxFloat32x4 LoadFrameInfo() +{ + return rw_frame_info[FfxInt32x2(0, 0)]; +} + +void StoreFrameInfo(FfxFloat32x4 fInfo) +{ + rw_frame_info[FfxInt32x2(0, 0)] = fInfo; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_FRAME_INFO) +Texture2D r_frame_info : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_FRAME_INFO); + +FfxFloat32x4 FrameInfo() +{ + return r_frame_info[FfxInt32x2(0, 0)]; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_0) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_1) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_2) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_3) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5) + +RWTexture2D rw_spd_mip0 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_0); +RWTexture2D rw_spd_mip1 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_1); +RWTexture2D rw_spd_mip2 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_2); +RWTexture2D rw_spd_mip3 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_3); +RWTexture2D rw_spd_mip4 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4); +globallycoherent RWTexture2D rw_spd_mip5 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5); + +FfxFloat32x2 RWLoadPyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 index) +{ +#define LOAD(idx) \ + if (index == idx) \ + { \ + return rw_spd_mip##idx[iPxPos]; \ + } + LOAD(0); + LOAD(1); + LOAD(2); + LOAD(3); + LOAD(4); + LOAD(5); + + return 0; + +#undef LOAD +} + +void StorePyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 outValue, FFX_PARAMETER_IN FfxUInt32 index) +{ +#define STORE(idx) \ + if (index == idx) \ + { \ + rw_spd_mip##idx[iPxPos] = outValue; \ + } + + STORE(0); + STORE(1); + STORE(2); + STORE(3); + STORE(4); + STORE(5); + +#undef STORE +} +#endif + +#if defined FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC +globallycoherent RWTexture2D rw_spd_global_atomic : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC); + +void SPD_IncreaseAtomicCounter(inout FfxUInt32 spdCounter) +{ + InterlockedAdd(rw_spd_global_atomic[FfxInt32x2(0, 0)], 1, spdCounter); +} + +void SPD_ResetAtomicCounter() +{ + rw_spd_global_atomic[FfxInt32x2(0, 0)] = 0; +} +#endif + +#endif // #if defined(FFX_GPU) diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h.meta b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h.meta new file mode 100644 index 0000000..aca1bc0 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: aef945ff764c453428c8c4759706228d +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h new file mode 100644 index 0000000..dd479b1 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h @@ -0,0 +1,403 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#if !defined(FFX_FSR3UPSCALER_COMMON_H) +#define FFX_FSR3UPSCALER_COMMON_H + +#if defined(FFX_GPU) +#pragma warning(error : 3206) // treat vector truncation warnings as errors +#pragma warning(disable : 3205) // conversion from larger type to smaller +#pragma warning(disable : 3571) // in ffxPow(f, e), f could be negative + +FFX_STATIC const FfxFloat32 FSR3UPSCALER_FP16_MIN = 6.10e-05f; +FFX_STATIC const FfxFloat32 FSR3UPSCALER_FP16_MAX = 65504.0f; +FFX_STATIC const FfxFloat32 FSR3UPSCALER_EPSILON = FSR3UPSCALER_FP16_MIN; +FFX_STATIC const FfxFloat32 FSR3UPSCALER_TONEMAP_EPSILON = FSR3UPSCALER_FP16_MIN; +FFX_STATIC const FfxFloat32 FSR3UPSCALER_FP32_MAX = 3.402823466e+38f; +FFX_STATIC const FfxFloat32 FSR3UPSCALER_FP32_MIN = 1.175494351e-38f; + +// Reconstructed depth usage +FFX_STATIC const FfxFloat32 fReconstructedDepthBilinearWeightThreshold = FSR3UPSCALER_EPSILON * 10; + +FfxFloat32 ReconstructedDepthMvPxThreshold(FfxFloat32 fNearestDepthInMeters) +{ + return ffxLerp(0.25f, 0.75f, ffxSaturate(fNearestDepthInMeters / 100.0f)); +} + +// Accumulation +FFX_STATIC const FfxFloat32 fUpsampleLanczosWeightScale = 1.0f / 16.0f; +FFX_STATIC const FfxFloat32 fAverageLanczosWeightPerFrame = 0.74f * fUpsampleLanczosWeightScale; // Average lanczos weight for jitter accumulated samples +FFX_STATIC const FfxFloat32 fAccumulationMaxOnMotion = 3.0f * fUpsampleLanczosWeightScale; + +#define SHADING_CHANGE_SET_SIZE 5 +FFX_STATIC const FfxInt32 iShadingChangeMipStart = 0; +FFX_STATIC const FfxFloat32 fShadingChangeSamplePow = 1.0f / 1.0f; + + +FFX_STATIC const FfxFloat32 fLockThreshold = 1.0f; +FFX_STATIC const FfxFloat32 fLockMax = 2.0f; + +FFX_STATIC const FfxInt32 REACTIVE = 0; +FFX_STATIC const FfxInt32 DISOCCLUSION = 1; +FFX_STATIC const FfxInt32 SHADING_CHANGE = 2; +FFX_STATIC const FfxInt32 ACCUMULAION = 3; + +FFX_STATIC const FfxInt32 FRAME_INFO_EXPOSURE = 0; +FFX_STATIC const FfxInt32 FRAME_INFO_LOG_LUMA = 1; +FFX_STATIC const FfxInt32 FRAME_INFO_SCENE_AVERAGE_LUMA = 2; + +FfxBoolean TonemapFirstFrame() +{ + const FfxBoolean bEnabled = true; + return FrameIndex() == 0 && bEnabled; +} + +FfxFloat32 AverageLanczosWeightPerFrame() +{ + return 0.74f; +} + +FfxInt32x2 ShadingChangeRenderSize() +{ + return FfxInt32x2(RenderSize() * 0.5f); +} + +FfxInt32x2 ShadingChangeMaxRenderSize() +{ + return FfxInt32x2(MaxRenderSize() * 0.5f); +} + +FfxInt32x2 PreviousFrameShadingChangeRenderSize() +{ + return FfxInt32x2(PreviousFrameRenderSize() * 0.5f); +} + +#if defined(FSR3UPSCALER_BIND_SRV_FRAME_INFO) +FfxFloat32 SceneAverageLuma() +{ + return FrameInfo()[FRAME_INFO_SCENE_AVERAGE_LUMA]; +} +#endif + +// Auto exposure +FFX_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e8f; + +struct AccumulationPassCommonParams +{ + FfxInt32x2 iPxHrPos; + FfxFloat32x2 fHrUv; + FfxFloat32x2 fLrUvJittered; + FfxFloat32x2 fLrUv_HwSampler; + FfxFloat32x2 fMotionVector; + FfxFloat32x2 fReprojectedHrUv; + FfxFloat32 f4KVelocity; + FfxFloat32 fDisocclusion; + FfxFloat32 fReactiveMask; + FfxFloat32 fShadingChange; + FfxFloat32 fAccumulation; + FfxFloat32 fLumaInstabilityFactor; + FfxFloat32 fFarthestDepthInMeters; + + FfxBoolean bIsExistingSample; + FfxBoolean bIsNewSample; +}; + +FfxFloat32 Get4KVelocity(FfxFloat32x2 fMotionVector) +{ + return length(fMotionVector * FfxFloat32x2(3840.0f, 2160.0f)); +} + +struct RectificationBox +{ + FfxFloat32x3 boxCenter; + FfxFloat32x3 boxVec; + FfxFloat32x3 aabbMin; + FfxFloat32x3 aabbMax; + FfxFloat32 fBoxCenterWeight; +}; + +struct AccumulationPassData +{ + RectificationBox clippingBox; + FfxFloat32x3 fUpsampledColor; + FfxFloat32 fUpsampledWeight; + FfxFloat32x3 fHistoryColor; + FfxFloat32 fHistoryWeight; + FfxFloat32 fLock; + FfxFloat32 fLockContributionThisFrame; +}; + +void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight) +{ + rectificationBox.aabbMin = colorSample; + rectificationBox.aabbMax = colorSample; + + FfxFloat32x3 weightedSample = colorSample * fSampleWeight; + rectificationBox.boxCenter = weightedSample; + rectificationBox.boxVec = colorSample * weightedSample; + rectificationBox.fBoxCenterWeight = fSampleWeight; +} + +void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight) +{ + if (bInitialSample) { + RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight); + } else { + rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample); + rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample); + + FfxFloat32x3 weightedSample = colorSample * fSampleWeight; + rectificationBox.boxCenter += weightedSample; + rectificationBox.boxVec += colorSample * weightedSample; + rectificationBox.fBoxCenterWeight += fSampleWeight; + } +} + +void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBox rectificationBox) +{ + rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FfxFloat32(FSR3UPSCALER_FP32_MIN) ? rectificationBox.fBoxCenterWeight : FfxFloat32(1.f)); + rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight; + rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight; + FfxFloat32x3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter)); + rectificationBox.boxVec = stdDev; +} + +FfxFloat32x3 SafeRcp3(FfxFloat32x3 v) +{ + return (all(FFX_NOT_EQUAL(v, FfxFloat32x3(0, 0, 0)))) ? (FfxFloat32x3(1, 1, 1) / v) : FfxFloat32x3(0, 0, 0); +} + +FfxFloat32 MinDividedByMax(const FfxFloat32 v0, const FfxFloat32 v1, const FfxFloat32 fOnZeroReturnValue) +{ + const FfxFloat32 m = ffxMax(v0, v1); + return m != 0 ? ffxMin(v0, v1) / m : fOnZeroReturnValue; +} + +FfxFloat32 MinDividedByMax(const FfxFloat32 v0, const FfxFloat32 v1) +{ + const FfxFloat32 m = ffxMax(v0, v1); + return m != 0 ? ffxMin(v0, v1) / m : 0; +} + +FfxFloat32x3 YCoCgToRGB(FfxFloat32x3 fYCoCg) +{ + FfxFloat32x3 fRgb; + + fRgb = FfxFloat32x3( + fYCoCg.x + fYCoCg.y - fYCoCg.z, + fYCoCg.x + fYCoCg.z, + fYCoCg.x - fYCoCg.y - fYCoCg.z); + + return fRgb; +} + +FfxFloat32x3 RGBToYCoCg(FfxFloat32x3 fRgb) +{ + FfxFloat32x3 fYCoCg; + + fYCoCg = FfxFloat32x3( + 0.25f * fRgb.r + 0.5f * fRgb.g + 0.25f * fRgb.b, + 0.5f * fRgb.r - 0.5f * fRgb.b, + -0.25f * fRgb.r + 0.5f * fRgb.g - 0.25f * fRgb.b); + + return fYCoCg; +} + +FfxFloat32 RGBToLuma(FfxFloat32x3 fLinearRgb) +{ + return dot(fLinearRgb, FfxFloat32x3(0.2126f, 0.7152f, 0.0722f)); +} + +FfxFloat32 RGBToPerceivedLuma(FfxFloat32x3 fLinearRgb) +{ + FfxFloat32 fLuminance = RGBToLuma(fLinearRgb); + + FfxFloat32 fPercievedLuminance = 0; + if (fLuminance <= 216.0f / 24389.0f) { + fPercievedLuminance = fLuminance * (24389.0f / 27.0f); + } + else { + fPercievedLuminance = ffxPow(fLuminance, 1.0f / 3.0f) * 116.0f - 16.0f; + } + + return fPercievedLuminance * 0.01f; +} + +FfxFloat32x3 Tonemap(FfxFloat32x3 fRgb) +{ + return fRgb / (ffxMax(ffxMax(0.f, fRgb.r), ffxMax(fRgb.g, fRgb.b)) + 1.f).xxx; +} + +FfxFloat32x3 InverseTonemap(FfxFloat32x3 fRgb) +{ + return fRgb / ffxMax(FSR3UPSCALER_TONEMAP_EPSILON, 1.f - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx; +} + +FfxBoolean IsUvInside(FfxFloat32x2 fUv) +{ + return (fUv.x >= 0.0f && fUv.x <= 1.0f) && (fUv.y >= 0.0f && fUv.y <= 1.0f); +} + +FfxInt32x2 ClampLoad(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize) +{ + FfxInt32x2 result = iPxSample + iPxOffset; + result.x = ffxMax(0, ffxMin(result.x, iTextureSize.x - 1)); + result.y = ffxMax(0, ffxMin(result.y, iTextureSize.y - 1)); + return result; +} + +FfxFloat32x2 ClampUv(FfxFloat32x2 fUv, FfxInt32x2 iTextureSize, FfxInt32x2 iResourceSize) +{ + const FfxFloat32x2 fSampleLocation = fUv * iTextureSize; + const FfxFloat32x2 fClampedLocation = ffxMax(FfxFloat32x2(0.5f, 0.5f), ffxMin(fSampleLocation, FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f))); + const FfxFloat32x2 fClampedUv = fClampedLocation / FfxFloat32x2(iResourceSize); + + return fClampedUv; +} + +FfxBoolean IsOnScreen(FfxInt32x2 pos, FfxInt32x2 size) +{ + return all(FFX_LESS_THAN(FfxUInt32x2(pos), FfxUInt32x2(size))); +} + +FfxFloat32 ComputeAutoExposureFromLavg(FfxFloat32 Lavg) +{ + Lavg = exp(Lavg); + + const FfxFloat32 S = 100.0f; //ISO arithmetic speed + const FfxFloat32 K = 12.5f; + FfxFloat32 ExposureISO100 = log2((Lavg * S) / K); + + const FfxFloat32 q = 0.65f; + FfxFloat32 Lmax = (78.0f / (q * S)) * ffxPow(2.0f, ExposureISO100); + + return 1.0f / Lmax; +} + +FfxInt32x2 ComputeHrPosFromLrPos(FfxInt32x2 iPxLrPos) +{ + FfxFloat32x2 fSrcJitteredPos = FfxFloat32x2(iPxLrPos) + 0.5f - Jitter(); + FfxFloat32x2 fLrPosInHr = (fSrcJitteredPos / RenderSize()) * UpscaleSize(); + FfxInt32x2 iPxHrPos = FfxInt32x2(floor(fLrPosInHr)); + return iPxHrPos; +} + +FfxFloat32x2 ComputeNdc(FfxFloat32x2 fPxPos, FfxInt32x2 iSize) +{ + return fPxPos / FfxFloat32x2(iSize) * FfxFloat32x2(2.0f, -2.0f) + FfxFloat32x2(-1.0f, 1.0f); +} + +FfxFloat32 GetViewSpaceDepth(FfxFloat32 fDeviceDepth) +{ + const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors(); + + // fDeviceToViewDepth details found in ffx_fsr3upscaler.cpp + return (fDeviceToViewDepth[1] / (fDeviceDepth - fDeviceToViewDepth[0])); +} + +FfxFloat32 GetViewSpaceDepthInMeters(FfxFloat32 fDeviceDepth) +{ + return GetViewSpaceDepth(fDeviceDepth) * ViewSpaceToMetersFactor(); +} + +FfxFloat32x3 GetViewSpacePosition(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth) +{ + const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors(); + + const FfxFloat32 Z = GetViewSpaceDepth(fDeviceDepth); + + const FfxFloat32x2 fNdcPos = ComputeNdc(iViewportPos, iViewportSize); + const FfxFloat32 X = fDeviceToViewDepth[2] * fNdcPos.x * Z; + const FfxFloat32 Y = fDeviceToViewDepth[3] * fNdcPos.y * Z; + + return FfxFloat32x3(X, Y, Z); +} + +FfxFloat32x3 GetViewSpacePositionInMeters(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth) +{ + return GetViewSpacePosition(iViewportPos, iViewportSize, fDeviceDepth) * ViewSpaceToMetersFactor(); +} + +FfxFloat32 GetMaxDistanceInMeters() +{ +#if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH + return GetViewSpaceDepth(0.0f) * ViewSpaceToMetersFactor(); +#else + return GetViewSpaceDepth(1.0f) * ViewSpaceToMetersFactor(); +#endif +} + + +struct BilinearSamplingData +{ + FfxInt32x2 iOffsets[4]; + FfxFloat32 fWeights[4]; + FfxInt32x2 iBasePos; +}; + +BilinearSamplingData GetBilinearSamplingData(FfxFloat32x2 fUv, FfxInt32x2 iSize) +{ + BilinearSamplingData data; + + FfxFloat32x2 fPxSample = (fUv * iSize) - FfxFloat32x2(0.5f, 0.5f); + data.iBasePos = FfxInt32x2(floor(fPxSample)); + FfxFloat32x2 fPxFrac = ffxFract(fPxSample); + + data.iOffsets[0] = FfxInt32x2(0, 0); + data.iOffsets[1] = FfxInt32x2(1, 0); + data.iOffsets[2] = FfxInt32x2(0, 1); + data.iOffsets[3] = FfxInt32x2(1, 1); + + data.fWeights[0] = (1 - fPxFrac.x) * (1 - fPxFrac.y); + data.fWeights[1] = (fPxFrac.x) * (1 - fPxFrac.y); + data.fWeights[2] = (1 - fPxFrac.x) * (fPxFrac.y); + data.fWeights[3] = (fPxFrac.x) * (fPxFrac.y); + + return data; +} + +struct PlaneData +{ + FfxFloat32x3 fNormal; + FfxFloat32 fDistanceFromOrigin; +}; + +PlaneData GetPlaneFromPoints(FfxFloat32x3 fP0, FfxFloat32x3 fP1, FfxFloat32x3 fP2) +{ + PlaneData plane; + + FfxFloat32x3 v0 = fP0 - fP1; + FfxFloat32x3 v1 = fP0 - fP2; + plane.fNormal = normalize(cross(v0, v1)); + plane.fDistanceFromOrigin = -dot(fP0, plane.fNormal); + + return plane; +} + +FfxFloat32 PointToPlaneDistance(PlaneData plane, FfxFloat32x3 fPoint) +{ + return abs(dot(plane.fNormal, fPoint) + plane.fDistanceFromOrigin); +} + +#endif // #if defined(FFX_GPU) + +#endif //!defined(FFX_FSR3UPSCALER_COMMON_H) diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h.meta b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h.meta new file mode 100644 index 0000000..c735997 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 16bb355abb68c044983f42086cf3eb7e +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_debug_view.h b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_debug_view.h new file mode 100644 index 0000000..6f4fa33 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_debug_view.h @@ -0,0 +1,159 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +struct FfxDebugViewport +{ + FfxInt32x2 offset; + FfxInt32x2 size; +}; + +// Macro to cull and draw debug viewport +#define DRAW_VIEWPORT(function, pos, vp) \ + { \ + if (pointIsInsideViewport(pos, vp)) \ + { \ + function(pos, vp); \ + } \ + } + +FfxFloat32x2 getTransformedUv(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxFloat32x2 fUv = (FfxFloat32x2(iPxPos - vp.offset) + 0.5f) / vp.size; + + return fUv; +} + +FfxFloat32x3 getMotionVectorColor(FfxFloat32x2 fMotionVector) +{ + return FfxFloat32x3(0.5f + fMotionVector * RenderSize() * 0.5f, 0.5f); +} + +FfxFloat32x4 getUnusedIndicationColor(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxInt32x2 basePos = iPxPos - vp.offset; + + FfxFloat32 ar = FfxFloat32(vp.size.x) / FfxFloat32(vp.size.y); + + return FfxFloat32x4(basePos.x == FfxInt32(basePos.y * ar), 0, 0, 1); +} + +void drawDilatedMotionVectors(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + FfxFloat32x2 fUv_HW = ClampUv(fUv, RenderSize(), MaxRenderSize()); + + FfxFloat32x2 fMotionVector = SampleDilatedMotionVector(fUv_HW); + + StoreUpscaledOutput(iPxPos, getMotionVectorColor(fMotionVector)); +} + +void drawDisocclusionMask(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + FfxFloat32x2 fUv_HW = ClampUv(fUv, RenderSize(), MaxRenderSize()); + + FfxFloat32 fDisocclusionFactor = ffxSaturate(SampleDilatedReactiveMasks(fUv_HW)[DISOCCLUSION]); + + StoreUpscaledOutput(iPxPos, FfxFloat32x3(0, fDisocclusionFactor, 0)); +} + +void drawDetailProtectionTakedown(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + FfxFloat32x2 fUv_HW = ClampUv(fUv, RenderSize(), MaxRenderSize()); + + FfxFloat32 fProtectionTakedown = ffxSaturate(SampleDilatedReactiveMasks(fUv_HW)[REACTIVE]); + + StoreUpscaledOutput(iPxPos, FfxFloat32x3(0, fProtectionTakedown, 0)); +} + +void drawReactiveness(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + FfxFloat32x2 fUv_HW = ClampUv(fUv, RenderSize(), MaxRenderSize()); + + FfxFloat32 fShadingChange = ffxSaturate(SampleDilatedReactiveMasks(fUv_HW)[SHADING_CHANGE]); + + StoreUpscaledOutput(iPxPos, FfxFloat32x3(0, fShadingChange, 0)); +} + +void drawProtectedAreas(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + FfxFloat32 fProtection = ffxSaturate(SampleHistory(fUv).w - fLockThreshold); + + StoreUpscaledOutput(iPxPos, FfxFloat32x3(fProtection, 0, 0)); +} + +void drawDilatedDepthInMeters(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + FfxFloat32x2 fUv_HW = ClampUv(fUv, RenderSize(), MaxRenderSize()); + + const FfxFloat32 fDilatedDepth = SampleDilatedDepth(fUv_HW); + const FfxFloat32 fDepthInMeters = GetViewSpaceDepthInMeters(fDilatedDepth); + + StoreUpscaledOutput(iPxPos, FfxFloat32x3(ffxSaturate(fDepthInMeters / 25.0f), 0, 0)); +} + +FfxBoolean pointIsInsideViewport(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxInt32x2 extent = vp.offset + vp.size; + + return (iPxPos.x >= vp.offset.x && iPxPos.x < extent.x) && (iPxPos.y >= vp.offset.y && iPxPos.y < extent.y); +} + +void DebugView(FfxInt32x2 iPxPos) +{ +#define VIEWPORT_GRID_SIZE_X 3 +#define VIEWPORT_GRID_SIZE_Y 3 + + FfxFloat32x2 fViewportScale = FfxFloat32x2(1.0f / VIEWPORT_GRID_SIZE_X, 1.0f / VIEWPORT_GRID_SIZE_Y); + FfxInt32x2 iViewportSize = FfxInt32x2(UpscaleSize() * fViewportScale); + + // compute grid [y][x] for easier placement of viewports + FfxDebugViewport vp[VIEWPORT_GRID_SIZE_Y][VIEWPORT_GRID_SIZE_X]; + for (FfxInt32 y = 0; y < VIEWPORT_GRID_SIZE_Y; y++) + { + for (FfxInt32 x = 0; x < VIEWPORT_GRID_SIZE_X; x++) + { + vp[y][x].offset = iViewportSize * FfxInt32x2(x, y); + vp[y][x].size = iViewportSize; + } + } + + // top row + DRAW_VIEWPORT(drawDilatedMotionVectors, iPxPos, vp[0][0]); + DRAW_VIEWPORT(drawProtectedAreas, iPxPos, vp[0][1]); + DRAW_VIEWPORT(drawDilatedDepthInMeters, iPxPos, vp[0][2]); + + // bottom row + DRAW_VIEWPORT(drawDisocclusionMask, iPxPos, vp[2][0]); + DRAW_VIEWPORT(drawReactiveness, iPxPos, vp[2][1]); + DRAW_VIEWPORT(drawDetailProtectionTakedown, iPxPos, vp[2][2]); +} diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_debug_view.h.meta b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_debug_view.h.meta new file mode 100644 index 0000000..3ec32db --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_debug_view.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 9c5c756c93605b6428eaae6c8aaabbe5 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_instability.h b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_instability.h new file mode 100644 index 0000000..624b7ca --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_instability.h @@ -0,0 +1,115 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +struct LumaInstabilityFactorData +{ + FfxFloat32x4 fLumaHistory; + FfxFloat32 fLumaInstabilityFactor; +}; + +LumaInstabilityFactorData ComputeLumaInstabilityFactor(LumaInstabilityFactorData data, FfxFloat32 fCurrentFrameLuma, FfxFloat32 fFarthestDepthInMeters) +{ + const FfxInt32 N_MINUS_1 = 0; + const FfxInt32 N_MINUS_2 = 1; + const FfxInt32 N_MINUS_3 = 2; + const FfxInt32 N_MINUS_4 = 3; + + FfxFloat32 fLumaInstability = 0.0f; + const FfxFloat32 fDiffs0 = (fCurrentFrameLuma - data.fLumaHistory[N_MINUS_1]); + const FfxFloat32 fSimilarity0 = MinDividedByMax(fCurrentFrameLuma, data.fLumaHistory[N_MINUS_1], 1.0f); + + FfxFloat32 fMaxSimilarity = fSimilarity0; + + if (fSimilarity0 < 1.0f) { + for (int i = N_MINUS_2; i <= N_MINUS_4; i++) { + const FfxFloat32 fDiffs1 = (fCurrentFrameLuma - data.fLumaHistory[i]); + const FfxFloat32 fSimilarity1 = MinDividedByMax(fCurrentFrameLuma, data.fLumaHistory[i]); + + if (sign(fDiffs0) == sign(fDiffs1)) { + + fMaxSimilarity = ffxMax(fMaxSimilarity, fSimilarity1); + } + } + + fLumaInstability = FfxFloat32(fMaxSimilarity > fSimilarity0); + } + + // Shift history + data.fLumaHistory[N_MINUS_4] = data.fLumaHistory[N_MINUS_3]; + data.fLumaHistory[N_MINUS_3] = data.fLumaHistory[N_MINUS_2]; + data.fLumaHistory[N_MINUS_2] = data.fLumaHistory[N_MINUS_1]; + data.fLumaHistory[N_MINUS_1] = fCurrentFrameLuma; + + data.fLumaHistory /= Exposure(); + + data.fLumaInstabilityFactor = fLumaInstability * FfxFloat32(data.fLumaHistory[N_MINUS_4] != 0); + + return data; +} + +void LumaInstability(FfxInt32x2 iPxPos) +{ + LumaInstabilityFactorData data; + data.fLumaInstabilityFactor = 0.0f; + data.fLumaHistory = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f); + + const FfxFloat32x2 fDilatedMotionVector = LoadDilatedMotionVector(iPxPos); + const FfxFloat32x2 fUv = (iPxPos + 0.5f) / RenderSize(); + const FfxFloat32x2 fUvCurrFrameJittered = fUv + Jitter() / RenderSize(); + const FfxFloat32x2 fUvPrevFrameJittered = fUv + PreviousFrameJitter() / PreviousFrameRenderSize(); + const FfxFloat32x2 fReprojectedUv = fUvPrevFrameJittered + fDilatedMotionVector; + + if (IsUvInside(fReprojectedUv)) + { + const FfxFloat32x2 fUvReactive_HW = ClampUv(fUvCurrFrameJittered, RenderSize(), MaxRenderSize()); + + const FfxFloat32x4 fDilatedReactiveMasks = SampleDilatedReactiveMasks(fUvReactive_HW); + const FfxFloat32 fReactiveMask = ffxSaturate(fDilatedReactiveMasks[REACTIVE]); + const FfxFloat32 fDisocclusion = ffxSaturate(fDilatedReactiveMasks[DISOCCLUSION]); + const FfxFloat32 fShadingChange = ffxSaturate(fDilatedReactiveMasks[SHADING_CHANGE]); + const FfxFloat32 fAccumulation = ffxSaturate(fDilatedReactiveMasks[ACCUMULAION]); + + const FfxBoolean bAccumulationFactor = fAccumulation > 0.9f; + + const FfxBoolean bComputeInstability = bAccumulationFactor; + + if (bComputeInstability) { + + const FfxFloat32x2 fUv_HW = ClampUv(fUvCurrFrameJittered, RenderSize(), MaxRenderSize()); + const FfxFloat32 fCurrentFrameLuma = SampleCurrentLuma(fUv_HW) * Exposure(); + + const FfxFloat32x2 fReprojectedUv_HW = ClampUv(fReprojectedUv, PreviousFrameRenderSize(), MaxRenderSize()); + data.fLumaHistory = SampleLumaHistory(fReprojectedUv_HW) * DeltaPreExposure() * Exposure(); + + const FfxFloat32x2 fFarthestDepthUv_HW = ClampUv(fUvCurrFrameJittered, RenderSize() / 2, GetFarthestDepthMip1ResourceDimensions()); + const FfxFloat32 fFarthestDepthInMeters = SampleFarthestDepthMip1(fFarthestDepthUv_HW); + + data = ComputeLumaInstabilityFactor(data, fCurrentFrameLuma, fFarthestDepthInMeters); + + const FfxFloat32 fVelocityWeight = 1.0f - ffxSaturate(Get4KVelocity(fDilatedMotionVector) / 20.0f); + data.fLumaInstabilityFactor *= fVelocityWeight * (1.0f - fDisocclusion) * (1.0f - fReactiveMask) * (1.0f - fShadingChange); + } + } + + StoreLumaHistory(iPxPos, data.fLumaHistory); + StoreLumaInstability(iPxPos, data.fLumaInstabilityFactor); +} diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_instability.h.meta b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_instability.h.meta new file mode 100644 index 0000000..7df8e8b --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_instability.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: c34dbb67d00358d4b9de0a9fb0e0ace2 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid.h b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid.h new file mode 100644 index 0000000..e8a8c49 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid.h @@ -0,0 +1,192 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +FFX_GROUPSHARED FfxUInt32 spdCounter; + +void SpdIncreaseAtomicCounter(FfxUInt32 slice) +{ + SPD_IncreaseAtomicCounter(spdCounter); +} + +FfxUInt32 SpdGetAtomicCounter() +{ + return spdCounter; +} + +void SpdResetAtomicCounter(FfxUInt32 slice) +{ + SPD_ResetAtomicCounter(); +} + +#ifndef SPD_PACKED_ONLY +FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16]; + +FFX_STATIC const FfxInt32 LOG_LUMA = 0; +FFX_STATIC const FfxInt32 LUMA = 1; +FFX_STATIC const FfxInt32 DEPTH_IN_METERS = 2; + +FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 iPxPos, FfxUInt32 slice) +{ + //We assume linear data. if non-linear input (sRGB, ...), + //then we should convert to linear first and back to sRGB on output. + const FfxInt32x2 iPxSamplePos = ClampLoad(FfxInt32x2(iPxPos), FfxInt32x2(0, 0), FfxInt32x2(RenderSize())); + + const FfxFloat32 fLuma = LoadCurrentLuma(iPxSamplePos); + const FfxFloat32 fLogLuma = ffxMax(FSR3UPSCALER_EPSILON, log(fLuma)); + const FfxFloat32 fFarthestDepthInMeters = LoadFarthestDepth(iPxSamplePos); + + FfxFloat32x4 fOutput = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f); + fOutput[LOG_LUMA] = fLogLuma; + fOutput[LUMA] = fLuma; + fOutput[DEPTH_IN_METERS] = fFarthestDepthInMeters; + + return fOutput; +} + +FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice) +{ + return FfxFloat32x4(RWLoadPyramid(tex, 5), 0, 0); +} + +FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) +{ + return (v0 + v1 + v2 + v3) * 0.25f; +} + +void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice) +{ + if (index == 5) + { + StorePyramid(pix, outValue.xy, index); + } + else if (index == 0) { + StoreFarthestDepthMip1(pix, outValue[DEPTH_IN_METERS]); + } + + if (index == MipCount() - 1) { //accumulate on 1x1 level + + if (all(FFX_EQUAL(pix, FfxInt32x2(0, 0)))) + { + FfxFloat32x4 frameInfo = LoadFrameInfo(); + const FfxFloat32 fSceneAvgLuma = outValue[LUMA]; + const FfxFloat32 fPrevLogLuma = frameInfo[FRAME_INFO_LOG_LUMA]; + FfxFloat32 fLogLuma = outValue[LOG_LUMA]; + + if (fPrevLogLuma < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values + { + fLogLuma = fPrevLogLuma + (fLogLuma - fPrevLogLuma) * (1.0f - exp(-DeltaTime())); + fLogLuma = ffxMax(0.0f, fLogLuma); + } + + frameInfo[FRAME_INFO_EXPOSURE] = ComputeAutoExposureFromLavg(fLogLuma); + frameInfo[FRAME_INFO_LOG_LUMA] = fLogLuma; + frameInfo[FRAME_INFO_SCENE_AVERAGE_LUMA] = fSceneAvgLuma; + + StoreFrameInfo(frameInfo); + } + } +} + +FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat32x4( + spdIntermediateR[x][y], + spdIntermediateG[x][y], + spdIntermediateB[x][y], + spdIntermediateA[x][y]); +} +void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) +{ + spdIntermediateR[x][y] = value.x; + spdIntermediateG[x][y] = value.y; + spdIntermediateB[x][y] = value.z; + spdIntermediateA[x][y] = value.w; +} + +#endif + +// define fetch and store functions Packed +#if FFX_HALF + +FFX_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16]; +FFX_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16]; + +FfxFloat16x4 SpdLoadSourceImageH(FfxFloat32x2 tex, FfxUInt32 slice) +{ + return FfxFloat16x4(0, 0, 0, 0); +} + +FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice) +{ + return FfxFloat16x4(0, 0, 0, 0); +} + +void SpdStoreH(FfxInt32x2 p, FfxFloat16x4 value, FfxUInt32 mip, FfxUInt32 slice) +{ +} + +FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat16x4( + spdIntermediateRG[x][y].x, + spdIntermediateRG[x][y].y, + spdIntermediateBA[x][y].x, + spdIntermediateBA[x][y].y); +} + +void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value) +{ + spdIntermediateRG[x][y] = value.xy; + spdIntermediateBA[x][y] = value.zw; +} + +FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3) +{ + return (v0 + v1 + v2 + v3) * FfxFloat16(0.25); +} +#endif + +#include "../spd/ffx_spd.h" + +void ComputeAutoExposure(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex) +{ +#if FFX_HALF + SpdDownsampleH( + FfxUInt32x2(WorkGroupId.xy), + FfxUInt32(LocalThreadIndex), + FfxUInt32(MipCount()), + FfxUInt32(NumWorkGroups()), + FfxUInt32(WorkGroupId.z), + FfxUInt32x2(WorkGroupOffset())); +#else + SpdDownsample( + FfxUInt32x2(WorkGroupId.xy), + FfxUInt32(LocalThreadIndex), + FfxUInt32(MipCount()), + FfxUInt32(NumWorkGroups()), + FfxUInt32(WorkGroupId.z), + FfxUInt32x2(WorkGroupOffset())); +#endif +} diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid.h.meta b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid.h.meta new file mode 100644 index 0000000..72f45f7 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 5138e351a00fc914f973d2495f9aa07f +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_inputs.h b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_inputs.h new file mode 100644 index 0000000..59c765b --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_inputs.h @@ -0,0 +1,152 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +void ReconstructPrevDepth(FfxInt32x2 iPxPos, FfxFloat32 fDepth, FfxFloat32x2 fMotionVector) +{ + const FfxFloat32 fNearestDepthInMeters = ffxMin(GetViewSpaceDepthInMeters(fDepth), FSR3UPSCALER_FP16_MAX); + const FfxFloat32 fReconstructedDeptMvThreshold = ReconstructedDepthMvPxThreshold(fNearestDepthInMeters); + + // Discard small mvs + fMotionVector *= FfxFloat32(Get4KVelocity(fMotionVector) > fReconstructedDeptMvThreshold); + + const FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5)) / RenderSize(); + const FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; + const BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, RenderSize()); + + // Project current depth into previous frame locations. + // Push to all pixels having some contribution if reprojection is using bilinear logic. + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) { + + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex]; + + if (fWeight > fReconstructedDepthBilinearWeightThreshold) { + + const FfxInt32x2 iStorePos = bilinearInfo.iBasePos + iOffset; + if (IsOnScreen(iStorePos, RenderSize())) { + StoreReconstructedDepth(iStorePos, fDepth); + } + } + } +} + +struct DepthExtents +{ + FfxFloat32 fNearest; + FfxInt32x2 fNearestCoord; + FfxFloat32 fFarthest; +}; + +DepthExtents FindDepthExtents(FFX_PARAMETER_IN FfxInt32x2 iPxPos) +{ + DepthExtents extents; + const FfxInt32 iSampleCount = 9; + const FfxInt32x2 iSampleOffsets[iSampleCount] = { + FfxInt32x2(+0, +0), + FfxInt32x2(+1, +0), + FfxInt32x2(+0, +1), + FfxInt32x2(+0, -1), + FfxInt32x2(-1, +0), + FfxInt32x2(-1, +1), + FfxInt32x2(+1, +1), + FfxInt32x2(-1, -1), + FfxInt32x2(+1, -1), + }; + + // pull out the depth loads to allow SC to batch them + FfxFloat32 depth[9]; + FfxInt32 iSampleIndex = 0; + FFX_UNROLL + for (iSampleIndex = 0; iSampleIndex < iSampleCount; ++iSampleIndex) { + + FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex]; + depth[iSampleIndex] = LoadInputDepth(iPos); + } + + // find closest depth + extents.fNearestCoord = iPxPos; + extents.fNearest = depth[0]; + extents.fFarthest = depth[0]; + FFX_UNROLL + for (iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex) { + + const FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex]; + if (IsOnScreen(iPos, RenderSize())) { + + FfxFloat32 fNdDepth = depth[iSampleIndex]; +#if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH + if (fNdDepth > extents.fNearest) { + extents.fFarthest = ffxMin(extents.fFarthest, fNdDepth); +#else + if (fNdDepth < extents.fNearest) { + extents.fFarthest = ffxMax(extents.fFarthest, fNdDepth); +#endif + extents.fNearestCoord = iPos; + extents.fNearest = fNdDepth; + } + } + } + + return extents; +} + +FfxFloat32x2 DilateMotionVector(FfxInt32x2 iPxPos, const DepthExtents depthExtents) +{ +#if FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS + const FfxInt32x2 iSamplePos = iPxPos; + const FfxInt32x2 iMotionVectorPos = depthExtents.fNearestCoord; +#else + const FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxPos); + const FfxInt32x2 iMotionVectorPos = ComputeHrPosFromLrPos(depthExtents.fNearestCoord); +#endif + + const FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iMotionVectorPos); + + return fDilatedMotionVector; +} + +FfxFloat32 GetCurrentFrameLuma(FfxInt32x2 iPxPos) +{ + //We assume linear data. if non-linear input (sRGB, ...), + //then we should convert to linear first and back to sRGB on output. + const FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxPos)); + const FfxFloat32 fLuma = RGBToLuma(fRgb); + + return fLuma; +} + +void PrepareInputs(FfxInt32x2 iPxPos) +{ + const DepthExtents depthExtents = FindDepthExtents(iPxPos); + const FfxFloat32x2 fDilatedMotionVector = DilateMotionVector(iPxPos, depthExtents); + + ReconstructPrevDepth(iPxPos, depthExtents.fNearest, fDilatedMotionVector); + + StoreDilatedMotionVector(iPxPos, fDilatedMotionVector); + StoreDilatedDepth(iPxPos, depthExtents.fNearest); + + const FfxFloat32 fFarthestDepthInMeters = ffxMin(GetViewSpaceDepthInMeters(depthExtents.fFarthest), FSR3UPSCALER_FP16_MAX); + StoreFarthestDepth(iPxPos, fFarthestDepthInMeters); + + const FfxFloat32 fLuma = GetCurrentFrameLuma(iPxPos); + StoreCurrentLuma(iPxPos, fLuma); +} diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_inputs.h.meta b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_inputs.h.meta new file mode 100644 index 0000000..9e7a5db --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_inputs.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: b08f1fd2bbf082242b930cb94652872c +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity.h b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity.h new file mode 100644 index 0000000..fa9571d --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity.h @@ -0,0 +1,270 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +FfxFloat32 ComputeDisocclusions(FfxFloat32x2 fUv, FfxFloat32x2 fMotionVector, FfxFloat32 fCurrentDepthViewSpace) +{ + const FfxFloat32 fNearestDepthInMeters = ffxMin(fCurrentDepthViewSpace * ViewSpaceToMetersFactor(), FSR3UPSCALER_FP16_MAX); + const FfxFloat32 fReconstructedDeptMvThreshold = ReconstructedDepthMvPxThreshold(fNearestDepthInMeters); + + fMotionVector *= FfxFloat32(Get4KVelocity(fMotionVector) > fReconstructedDeptMvThreshold); + + const FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; + const BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, RenderSize()); + + FfxFloat32 fDisocclusion = 0.0f; + FfxFloat32 fWeightSum = 0.0f; + FfxBoolean bPotentialDisocclusion = true; + + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4 && bPotentialDisocclusion; iSampleIndex++) + { + + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + const FfxInt32x2 iSamplePos = ClampLoad(bilinearInfo.iBasePos, iOffset, FfxInt32x2(RenderSize())); + + if (IsOnScreen(iSamplePos, RenderSize())) { + const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex]; + if (fWeight > fReconstructedDepthBilinearWeightThreshold) { + + const FfxFloat32 fPrevNearestDepthViewSpace = GetViewSpaceDepth(LoadReconstructedPrevDepth(iSamplePos)); + const FfxFloat32 fDepthDifference = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace; + + bPotentialDisocclusion = bPotentialDisocclusion && (fDepthDifference > FSR3UPSCALER_FP32_MIN); + + if (bPotentialDisocclusion) { + const FfxFloat32 fHalfViewportWidth = length(FfxFloat32x2(RenderSize()) * 0.5f); + const FfxFloat32 fDepthThreshold = ffxMax(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace); + + const FfxFloat32 Ksep = 1.37e-05f; + const FfxFloat32 fRequiredDepthSeparation = Ksep * fHalfViewportWidth * fDepthThreshold; + + fDisocclusion += ffxSaturate(FfxFloat32(fRequiredDepthSeparation / fDepthDifference)) * fWeight; + fWeightSum += fWeight; + } + } + } + } + + fDisocclusion = (bPotentialDisocclusion && fWeightSum > 0) ? ffxSaturate(1.0f - fDisocclusion / fWeightSum) : 0.0f; + + return fDisocclusion; +} + +FfxFloat32 ComputeMotionDivergence(FfxFloat32x2 fUv, FfxFloat32x2 fMotionVector, FfxFloat32 fCurrentDepthSample) +{ + const FfxInt32x2 iPxReprojectedPos = FfxInt32x2((fUv + fMotionVector) * RenderSize()); + const FfxFloat32 fReprojectedDepth = LoadDilatedDepth(iPxReprojectedPos); + const FfxFloat32x2 fReprojectedMotionVector = LoadDilatedMotionVector(iPxReprojectedPos); + + const FfxFloat32 fReprojectedVelocity = Get4KVelocity(fReprojectedMotionVector); + const FfxFloat32 f4KVelocity = Get4KVelocity(fMotionVector); + + const FfxFloat32 fMaxLen = max(length(fMotionVector), length(fReprojectedMotionVector)); + + const FfxFloat32 fNucleusDepthInMeters = GetViewSpaceDepthInMeters(fReprojectedDepth); + const FfxFloat32 fCurrentDepthInMeters = GetViewSpaceDepthInMeters(fCurrentDepthSample); + + const FfxFloat32 fDistanceFactor = MinDividedByMax(fNucleusDepthInMeters, fCurrentDepthInMeters); + const FfxFloat32 fVelocityFactor = ffxSaturate(f4KVelocity / 10.0f); + const FfxFloat32 fMotionVectorFieldConfidence = (1.0f - ffxSaturate(fReprojectedVelocity / f4KVelocity)) * fDistanceFactor * fVelocityFactor; + + return fMotionVectorFieldConfidence; +} + +FfxFloat32 DilateReactiveMasks(FfxInt32x2 iPxPos, FfxFloat32x2 fUv) +{ + FfxFloat32 fDilatedReactiveMasks = 0.0f; + + FFX_UNROLL + for (FfxInt32 y = -1; y <=1; y++) + { + FFX_UNROLL + for (FfxInt32 x = -1; x <= 1; x++) + { + const FfxInt32x2 sampleCoord = ClampLoad(iPxPos, FfxInt32x2(x, y), FfxInt32x2(RenderSize())); + fDilatedReactiveMasks = ffxMax(fDilatedReactiveMasks, LoadReactiveMask(sampleCoord)); + } + } + + return fDilatedReactiveMasks; +} + +FfxFloat32 DilateTransparencyAndCompositionMasks(FfxInt32x2 iPxPos, FfxFloat32x2 fUv) +{ + const FfxFloat32x2 fUvTransparencyAndCompositionMask = ClampUv(fUv, RenderSize(), GetTransparencyAndCompositionMaskResourceDimensions()); + return SampleTransparencyAndCompositionMask(fUvTransparencyAndCompositionMask); +} + +FfxFloat32 ComputeThinFeatureConfidence(FfxInt32x2 iPxPos) +{ + /* + 1 2 3 + 4 0 5 + 6 7 8 + */ + + const FfxInt32 iNucleusIndex = 0; + const FfxInt32 iSampleCount = 9; + const FfxInt32x2 iSampleOffsets[iSampleCount] = { + FfxInt32x2(+0, +0), + FfxInt32x2(-1, -1), + FfxInt32x2(+0, -1), + FfxInt32x2(+1, -1), + FfxInt32x2(-1, +0), + FfxInt32x2(+1, +0), + FfxInt32x2(-1, +1), + FfxInt32x2(+0, +1), + FfxInt32x2(+1, +1), + }; + + FfxFloat32 fSamples[iSampleCount]; + + FfxFloat32 fLumaMin = FSR3UPSCALER_FP32_MAX; + FfxFloat32 fLumaMax = FSR3UPSCALER_FP32_MIN; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 0; iSampleIndex < iSampleCount; ++iSampleIndex) { + const FfxInt32x2 iPxSamplePos = ClampLoad(iPxPos, iSampleOffsets[iSampleIndex], FfxInt32x2(RenderSize())); + fSamples[iSampleIndex] = LoadCurrentLuma(iPxSamplePos) * Exposure(); + + fLumaMin = ffxMin(fLumaMin, fSamples[iSampleIndex]); + fLumaMax = ffxMax(fLumaMax, fSamples[iSampleIndex]); + } + + const FfxFloat32 fThreshold = 0.9f; + FfxFloat32 fDissimilarLumaMin = FSR3UPSCALER_FP32_MAX; + FfxFloat32 fDissimilarLumaMax = 0; + +#define SETBIT(x) (1U << x) + + FfxUInt32 uPatternMask = SETBIT(iNucleusIndex); // Flag nucleus as similar + + const FfxUInt32 uNumRejectionMasks = 4; + const FfxUInt32 uRejectionMasks[uNumRejectionMasks] = { + SETBIT(1) | SETBIT(2) | SETBIT(4) | SETBIT(iNucleusIndex), // Upper left + SETBIT(2) | SETBIT(3) | SETBIT(5) | SETBIT(iNucleusIndex), // Upper right + SETBIT(4) | SETBIT(6) | SETBIT(7) | SETBIT(iNucleusIndex), // Lower left + SETBIT(5) | SETBIT(7) | SETBIT(8) | SETBIT(iNucleusIndex) // Lower right + }; + + FfxInt32 iBitIndex = 1; + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex, ++iBitIndex) { + + const FfxFloat32 fDifference = abs(fSamples[iSampleIndex] - fSamples[iNucleusIndex]) / (fLumaMax - fLumaMin); + + if (fDifference < fThreshold) + { + uPatternMask |= SETBIT(iBitIndex); + } + else + { + fDissimilarLumaMin = ffxMin(fDissimilarLumaMin, fSamples[iSampleIndex]); + fDissimilarLumaMax = ffxMax(fDissimilarLumaMax, fSamples[iSampleIndex]); + } + } + + const FfxBoolean bIsRidge = fSamples[iNucleusIndex] > fDissimilarLumaMax || fSamples[iNucleusIndex] < fDissimilarLumaMin; + + if (FFX_FALSE == bIsRidge) + { + return 0.0f; + } + + FFX_UNROLL + for (FfxInt32 i = 0; i < uNumRejectionMasks; i++) + { + if ((uPatternMask & uRejectionMasks[i]) == uRejectionMasks[i]) + { + return 0.0f; + } + } + + return 1.0f - fLumaMin / fLumaMax; +} + +FfxFloat32 UpdateAccumulation(FfxInt32x2 iPxPos, FfxFloat32x2 fUv, FfxFloat32x2 fMotionVector, FfxFloat32 fDisocclusion, FfxFloat32 fShadingChange) +{ + const FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; + FfxFloat32 fAccumulation = 0.0f; + + if (IsUvInside(fReprojectedUv)) { + const FfxFloat32x2 fReprojectedUv_HW = ClampUv(fReprojectedUv, PreviousFrameRenderSize(), MaxRenderSize()); + fAccumulation = ffxSaturate(SampleAccumulation(fReprojectedUv_HW)); + } + + fAccumulation = ffxLerp(fAccumulation, 0.0f, fShadingChange); + fAccumulation = ffxLerp(fAccumulation, ffxMin(fAccumulation, 0.25f), fDisocclusion); + + fAccumulation *= FfxFloat32(round(fAccumulation * 100.0f) > 1.0f); + + // Update for next frame, normalize to store in unorm + const FfxFloat32 fAccumulatedFramesMax = 3.0f; + const FfxFloat32 fAccumulatedFramesToStore = ffxSaturate(fAccumulation + (1.0f / fAccumulatedFramesMax)); + StoreAccumulation(iPxPos, fAccumulatedFramesToStore); + + return fAccumulation; +} + +FfxFloat32 ComputeShadingChange(FfxFloat32x2 fUv) +{ + // NOTE: Here we re-apply jitter, will be reverted again when sampled in accumulation pass + const FfxFloat32x2 fShadingChangeUv = ClampUv(fUv - Jitter() / RenderSize(), ShadingChangeRenderSize(), ShadingChangeMaxRenderSize()); + const FfxFloat32 fShadingChange = ffxSaturate(SampleShadingChange(fShadingChangeUv)); + + return fShadingChange; +} + +void PrepareReactivity(FfxInt32x2 iPxPos) +{ + const FfxFloat32x2 fUv = (iPxPos + 0.5f) / RenderSize(); + const FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos); + + // Discard small mvs + const FfxFloat32 f4KVelocity = Get4KVelocity(fMotionVector); + + const FfxFloat32x2 fDilatedUv = fUv + fMotionVector; + const FfxFloat32 fDilatedDepth = LoadDilatedDepth(iPxPos); + const FfxFloat32 fDepthInMeters = GetViewSpaceDepthInMeters(fDilatedDepth); + + const FfxFloat32 fDisocclusion = ComputeDisocclusions(fUv, fMotionVector, GetViewSpaceDepth(fDilatedDepth)); + const FfxFloat32 fShadingChange = ffxMax(DilateReactiveMasks(iPxPos, fUv), ComputeShadingChange(fUv)); + + const FfxFloat32 fMotionDivergence = ComputeMotionDivergence(fUv, fMotionVector, fDilatedDepth); + const FfxFloat32 fDilatedTransparencyAndComposition = DilateTransparencyAndCompositionMasks(iPxPos, fUv); + const FfxFloat32 fFinalReactiveness = ffxMax(fMotionDivergence, fDilatedTransparencyAndComposition); + + const FfxFloat32 fAccumulation = UpdateAccumulation(iPxPos, fUv, fMotionVector, fDisocclusion, fShadingChange); + + FfxFloat32x4 fOutput; + fOutput[REACTIVE] = fFinalReactiveness; + fOutput[DISOCCLUSION] = fDisocclusion; + fOutput[SHADING_CHANGE] = fShadingChange; + fOutput[ACCUMULAION] = fAccumulation; + + StoreDilatedReactiveMasks(iPxPos, fOutput); + + const FfxFloat32 fLockStrength = ComputeThinFeatureConfidence(iPxPos); + if (fLockStrength > (1.0f / 100.0f)) + { + StoreNewLocks(ComputeHrPosFromLrPos(FfxInt32x2(iPxPos)), fLockStrength); + } +} diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity.h.meta b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity.h.meta new file mode 100644 index 0000000..46fcfc6 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 0070b6300195a7649a899fe0ed099c82 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h new file mode 100644 index 0000000..90a85b3 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h @@ -0,0 +1,67 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define GROUP_SIZE 8 +#define FSR_RCAS_DENOISE 1 + +#include "../ffx_core.h" + +void WriteUpscaledOutput(FFX_MIN16_U2 iPxHrPos, FfxFloat32x3 fUpscaledColor) +{ + StoreUpscaledOutput(FFX_MIN16_I2(iPxHrPos), fUpscaledColor); +} + +#define FSR_RCAS_F 1 +FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p) +{ + FfxFloat32x4 fColor = LoadRCAS_Input(p); + + fColor.rgb *= Exposure(); + + return fColor; +} +void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {} + +#include "../fsr1/ffx_fsr1.h" + +void CurrFilter(FFX_MIN16_U2 pos) +{ + FfxFloat32x3 c; + FsrRcasF(c.r, c.g, c.b, pos, RCASConfig()); + + c /= Exposure(); + + WriteUpscaledOutput(pos, c); +} + +void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid) +{ + // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. + FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u); + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.x += 8u; + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.y += 8u; + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.x -= 8u; + CurrFilter(FFX_MIN16_U2(gxy)); +} diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h.meta b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h.meta new file mode 100644 index 0000000..f21aeb7 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: dc2e80d2251e46c4d9af5696c3061fab +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h new file mode 100644 index 0000000..153a9b7 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h @@ -0,0 +1,64 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE +#define FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE 0 // Reference +#endif + +FfxFloat32x4 WrapHistory(FfxInt32x2 iPxSample) +{ + return LoadHistory(iPxSample); +} + +DeclareCustomFetchBicubicSamples(FetchHistorySamples, WrapHistory) +DeclareCustomTextureSample(HistorySample, FFX_FSR3UPSCALER_GET_LANCZOS_SAMPLER1D(FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples) + +FfxFloat32x2 GetMotionVector(FfxInt32x2 iPxHrPos, FfxFloat32x2 fHrUv) +{ +#if FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS + const FfxFloat32x2 fDilatedMotionVector = LoadDilatedMotionVector(FFX_MIN16_I2(fHrUv * RenderSize())); +#else + const FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iPxHrPos); +#endif + + return fDilatedMotionVector; +} + +void ComputeReprojectedUVs(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxBoolean bIsExistingSample) +{ + fReprojectedHrUv = params.fHrUv + params.fMotionVector; + + bIsExistingSample = IsUvInside(fReprojectedHrUv); +} + +void ReprojectHistoryColor(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) +{ + const FfxFloat32x4 fReprojectedHistory = HistorySample(params.fReprojectedHrUv, PreviousFrameUpscaleSize()); + + data.fHistoryColor = fReprojectedHistory.rgb; + data.fHistoryColor *= DeltaPreExposure(); + data.fHistoryColor *= Exposure(); + + data.fHistoryColor = RGBToYCoCg(data.fHistoryColor); + + data.fLock = fReprojectedHistory.w; +} diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h.meta b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h.meta new file mode 100644 index 0000000..cb9c6d7 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 2bcf78cbd908b664cb9c656e28b430d4 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h new file mode 100644 index 0000000..b3d8ddb --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h @@ -0,0 +1,100 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR3UPSCALER_RESOURCES_H +#define FFX_FSR3UPSCALER_RESOURCES_H + +#if defined(FFX_CPU) || defined(FFX_GPU) +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL 0 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY 1 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_COLOR 2 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS 3 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_DEPTH 4 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_EXPOSURE 5 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK 6 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK 7 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH 8 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS 9 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_DEPTH 10 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR 11 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION 12 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NEW_LOCKS 13 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY 14 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DEBUG_OUTPUT 15 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LANCZOS_LUT 16 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT 17 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT 18 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RCAS_INPUT 19 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION_1 20 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION_2 21 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1 22 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 23 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY 24 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_TRANSPARENCY_AND_COMPOSITION 25 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS 26 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS 27 // same as FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_0 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_0 27 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_1 28 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_2 29 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_3 30 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_4 31 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_5 32 + +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE 33 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FRAME_INFO 34 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_AUTOREACTIVE 35 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_AUTOCOMPOSITION_DEPRECATED 36 + +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_1 37 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_2 38 + +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_1 40 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_2 41 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SHADING_CHANGE 42 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH 43 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH_MIP1 44 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_CURRENT_LUMA 45 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREVIOUS_LUMA 46 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_INSTABILITY 48 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERMEDIATE_FP16x1 49 + + +// Shading change detection mip level setting, value must be in the range [FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0, FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12] +//#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4 +//#define FFX_FSR3UPSCALER_SHADING_CHANGE_MIP_LEVEL (FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE - FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE) + +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT 60 + +#define FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_FSR3UPSCALER 0 +#define FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_SPD 1 +#define FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_RCAS 2 +#define FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE 3 +#define FFX_FSR3UPSCALER_CONSTANTBUFFER_COUNT 4 + +#define FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_TONEMAP 1 +#define FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP 2 +#define FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_THRESHOLD 4 +#define FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX 8 + +#endif // #if defined(FFX_CPU) || defined(FFX_GPU) + +#endif //!defined( FFX_FSR3UPSCALER_RESOURCES_H ) diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h.meta b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h.meta new file mode 100644 index 0000000..591d1d8 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 0a6f299994574d641a6a2f864d6b78b7 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h new file mode 100644 index 0000000..5f727b1 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h @@ -0,0 +1,602 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR3UPSCALER_SAMPLE_H +#define FFX_FSR3UPSCALER_SAMPLE_H + +// suppress warnings +#ifdef FFX_HLSL +#pragma warning(disable: 4008) // potentially divide by zero +#endif //FFX_HLSL + +struct FetchedBilinearSamples { + + FfxFloat32x4 fColor00; + FfxFloat32x4 fColor10; + + FfxFloat32x4 fColor01; + FfxFloat32x4 fColor11; +}; + +struct FetchedBicubicSamples { + + FfxFloat32x4 fColor00; + FfxFloat32x4 fColor10; + FfxFloat32x4 fColor20; + FfxFloat32x4 fColor30; + + FfxFloat32x4 fColor01; + FfxFloat32x4 fColor11; + FfxFloat32x4 fColor21; + FfxFloat32x4 fColor31; + + FfxFloat32x4 fColor02; + FfxFloat32x4 fColor12; + FfxFloat32x4 fColor22; + FfxFloat32x4 fColor32; + + FfxFloat32x4 fColor03; + FfxFloat32x4 fColor13; + FfxFloat32x4 fColor23; + FfxFloat32x4 fColor33; +}; + +#if FFX_HALF +struct FetchedBilinearSamplesMin16 { + + FFX_MIN16_F4 fColor00; + FFX_MIN16_F4 fColor10; + + FFX_MIN16_F4 fColor01; + FFX_MIN16_F4 fColor11; +}; + +struct FetchedBicubicSamplesMin16 { + + FFX_MIN16_F4 fColor00; + FFX_MIN16_F4 fColor10; + FFX_MIN16_F4 fColor20; + FFX_MIN16_F4 fColor30; + + FFX_MIN16_F4 fColor01; + FFX_MIN16_F4 fColor11; + FFX_MIN16_F4 fColor21; + FFX_MIN16_F4 fColor31; + + FFX_MIN16_F4 fColor02; + FFX_MIN16_F4 fColor12; + FFX_MIN16_F4 fColor22; + FFX_MIN16_F4 fColor32; + + FFX_MIN16_F4 fColor03; + FFX_MIN16_F4 fColor13; + FFX_MIN16_F4 fColor23; + FFX_MIN16_F4 fColor33; +}; +#else //FFX_HALF +#define FetchedBicubicSamplesMin16 FetchedBicubicSamples +#define FetchedBilinearSamplesMin16 FetchedBilinearSamples +#endif //FFX_HALF + +FfxFloat32x4 Linear(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32 t) +{ + return A + (B - A) * t; +} + +FfxFloat32x4 Bilinear(FetchedBilinearSamples BilinearSamples, FfxFloat32x2 fPxFrac) +{ + FfxFloat32x4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x); + FfxFloat32x4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x); + FfxFloat32x4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y); + return fColorXY; +} + +#if FFX_HALF +FFX_MIN16_F4 Linear(FFX_MIN16_F4 A, FFX_MIN16_F4 B, FFX_MIN16_F t) +{ + return A + (B - A) * t; +} + +FFX_MIN16_F4 Bilinear(FetchedBilinearSamplesMin16 BilinearSamples, FFX_MIN16_F2 fPxFrac) +{ + FFX_MIN16_F4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x); + FFX_MIN16_F4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x); + FFX_MIN16_F4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y); + return fColorXY; +} +#endif + +FfxFloat32 Lanczos2NoClamp(FfxFloat32 x) +{ + const FfxFloat32 PI = 3.141592653589793f; // TODO: share SDK constants + return abs(x) < FSR3UPSCALER_EPSILON ? 1.f : (sin(PI * x) / (PI * x)) * (sin(0.5f * PI * x) / (0.5f * PI * x)); +} + +FfxFloat32 Lanczos2(FfxFloat32 x) +{ + x = ffxMin(abs(x), 2.0f); + return Lanczos2NoClamp(x); +} + +#if FFX_HALF + +#if 0 +FFX_MIN16_F Lanczos2NoClamp(FFX_MIN16_F x) +{ + const FFX_MIN16_F PI = FFX_MIN16_F(3.141592653589793f); // TODO: share SDK constants + return abs(x) < FFX_MIN16_F(FSR3UPSCALER_EPSILON) ? FFX_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFX_MIN16_F(0.5f) * PI * x) / (FFX_MIN16_F(0.5f) * PI * x)); +} +#endif + +FFX_MIN16_F Lanczos2(FFX_MIN16_F x) +{ + x = ffxMin(abs(x), FFX_MIN16_F(2.0f)); + return FFX_MIN16_F(Lanczos2NoClamp(x)); +} +#endif //FFX_HALF + +// FSR1 lanczos approximation. Input is x*x and must be <= 4. +FfxFloat32 Lanczos2ApproxSqNoClamp(FfxFloat32 x2) +{ + FfxFloat32 a = (2.0f / 5.0f) * x2 - 1; + FfxFloat32 b = (1.0f / 4.0f) * x2 - 1; + return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b); +} + +#if FFX_HALF +FFX_MIN16_F Lanczos2ApproxSqNoClamp(FFX_MIN16_F x2) +{ + FFX_MIN16_F a = FFX_MIN16_F(2.0f / 5.0f) * x2 - FFX_MIN16_F(1); + FFX_MIN16_F b = FFX_MIN16_F(1.0f / 4.0f) * x2 - FFX_MIN16_F(1); + return (FFX_MIN16_F(25.0f / 16.0f) * a * a - FFX_MIN16_F(25.0f / 16.0f - 1)) * (b * b); +} +#endif //FFX_HALF + +FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2) +{ + x2 = ffxMin(x2, 4.0f); + return Lanczos2ApproxSqNoClamp(x2); +} + +#if FFX_HALF +FFX_MIN16_F Lanczos2ApproxSq(FFX_MIN16_F x2) +{ + x2 = ffxMin(x2, FFX_MIN16_F(4.0f)); + return Lanczos2ApproxSqNoClamp(x2); +} +#endif //FFX_HALF + +FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x) +{ + return Lanczos2ApproxSqNoClamp(x * x); +} + +#if FFX_HALF +FFX_MIN16_F Lanczos2ApproxNoClamp(FFX_MIN16_F x) +{ + return Lanczos2ApproxSqNoClamp(x * x); +} +#endif //FFX_HALF + +FfxFloat32 Lanczos2Approx(FfxFloat32 x) +{ + return Lanczos2ApproxSq(x * x); +} + +#if FFX_HALF +FFX_MIN16_F Lanczos2Approx(FFX_MIN16_F x) +{ + return Lanczos2ApproxSq(x * x); +} +#endif //FFX_HALF + +FfxFloat32 Lanczos2_UseLUT(FfxFloat32 x) +{ + return SampleLanczos2Weight(abs(x)); +} + +#if FFX_HALF +FFX_MIN16_F Lanczos2_UseLUT(FFX_MIN16_F x) +{ + return FFX_MIN16_F(SampleLanczos2Weight(abs(x))); +} +#endif //FFX_HALF + +FfxFloat32x4 Lanczos2_UseLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) +{ + FfxFloat32 fWeight0 = Lanczos2_UseLUT(-1.f - t); + FfxFloat32 fWeight1 = Lanczos2_UseLUT(-0.f - t); + FfxFloat32 fWeight2 = Lanczos2_UseLUT(+1.f - t); + FfxFloat32 fWeight3 = Lanczos2_UseLUT(+2.f - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} +#if FFX_HALF +FFX_MIN16_F4 Lanczos2_UseLUT(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t) +{ + FFX_MIN16_F fWeight0 = Lanczos2_UseLUT(FFX_MIN16_F(-1.f) - t); + FFX_MIN16_F fWeight1 = Lanczos2_UseLUT(FFX_MIN16_F(-0.f) - t); + FFX_MIN16_F fWeight2 = Lanczos2_UseLUT(FFX_MIN16_F(+1.f) - t); + FFX_MIN16_F fWeight3 = Lanczos2_UseLUT(FFX_MIN16_F(+2.f) - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} +#endif + +FfxFloat32x4 Lanczos2(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) +{ + FfxFloat32 fWeight0 = Lanczos2(-1.f - t); + FfxFloat32 fWeight1 = Lanczos2(-0.f - t); + FfxFloat32 fWeight2 = Lanczos2(+1.f - t); + FfxFloat32 fWeight3 = Lanczos2(+2.f - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} + +FfxFloat32x4 Lanczos2(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) +{ + FfxFloat32x4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FfxFloat32x4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FfxFloat32x4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FfxFloat32x4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FfxFloat32x4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FfxFloat32x4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; + FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { + + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); + + return fColorXY; +} + +#if FFX_HALF +FFX_MIN16_F4 Lanczos2(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t) +{ + FFX_MIN16_F fWeight0 = Lanczos2(FFX_MIN16_F(-1.f) - t); + FFX_MIN16_F fWeight1 = Lanczos2(FFX_MIN16_F(-0.f) - t); + FFX_MIN16_F fWeight2 = Lanczos2(FFX_MIN16_F(+1.f) - t); + FFX_MIN16_F fWeight3 = Lanczos2(FFX_MIN16_F(+2.f) - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} + +FFX_MIN16_F4 Lanczos2(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac) +{ + FFX_MIN16_F4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FFX_MIN16_F4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FFX_MIN16_F4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FFX_MIN16_F4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FFX_MIN16_F4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FFX_MIN16_F4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0]; + FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0]; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) + { + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); + + return fColorXY; +} +#endif //FFX_HALF + + +FfxFloat32x4 Lanczos2LUT(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) +{ + FfxFloat32x4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FfxFloat32x4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FfxFloat32x4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FfxFloat32x4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FfxFloat32x4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FfxFloat32x4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; + FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { + + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); + + return fColorXY; +} + +#if FFX_HALF +FFX_MIN16_F4 Lanczos2LUT(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac) +{ + FFX_MIN16_F4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FFX_MIN16_F4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FFX_MIN16_F4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FFX_MIN16_F4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FFX_MIN16_F4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FFX_MIN16_F4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0]; + FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0]; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) + { + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); + + return fColorXY; +} +#endif //FFX_HALF + + + +FfxFloat32x4 Lanczos2Approx(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) +{ + FfxFloat32 fWeight0 = Lanczos2ApproxNoClamp(-1.f - t); + FfxFloat32 fWeight1 = Lanczos2ApproxNoClamp(-0.f - t); + FfxFloat32 fWeight2 = Lanczos2ApproxNoClamp(+1.f - t); + FfxFloat32 fWeight3 = Lanczos2ApproxNoClamp(+2.f - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} + +#if FFX_HALF +FFX_MIN16_F4 Lanczos2Approx(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t) +{ + FFX_MIN16_F fWeight0 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-1.f) - t); + FFX_MIN16_F fWeight1 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-0.f) - t); + FFX_MIN16_F fWeight2 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+1.f) - t); + FFX_MIN16_F fWeight3 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+2.f) - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} +#endif //FFX_HALF + +FfxFloat32x4 Lanczos2Approx(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) +{ + FfxFloat32x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FfxFloat32x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FfxFloat32x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FfxFloat32x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FfxFloat32x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FfxFloat32x4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; + FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) + { + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); + + return fColorXY; +} + +#if FFX_HALF +FFX_MIN16_F4 Lanczos2Approx(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac) +{ + FFX_MIN16_F4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FFX_MIN16_F4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FFX_MIN16_F4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FFX_MIN16_F4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FFX_MIN16_F4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FFX_MIN16_F4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0]; + FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0]; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) + { + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); + + return fColorXY; +} +#endif + +// Clamp by offset direction. Assuming iPxSample is already in range and iPxOffset is compile time constant. +FfxInt32x2 ClampCoord(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize) +{ + FfxInt32x2 result = iPxSample + iPxOffset; + result.x = ffxMax(1, ffxMin(result.x, iTextureSize.x - 2)); + result.y = ffxMax(1, ffxMin(result.y, iTextureSize.y - 2)); + return result; +} +#if FFX_HALF +FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize) +{ + FFX_MIN16_I2 result = iPxSample + iPxOffset; + result.x = ffxMax(FFX_MIN16_I(1), ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(2))); + result.y = ffxMax(FFX_MIN16_I(1), ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(2))); + return result; +} +#endif //FFX_HALF + + +#define DeclareCustomFetchBicubicSamplesWithType(SampleType, TextureType, AddrType, Name, LoadTexture) \ + SampleType Name(AddrType iPxSample, AddrType iTextureSize) \ + { \ + SampleType Samples; \ + \ + Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, -1), iTextureSize))); \ + Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, -1), iTextureSize))); \ + Samples.fColor20 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, -1), iTextureSize))); \ + Samples.fColor30 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, -1), iTextureSize))); \ + \ + Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +0), iTextureSize))); \ + Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \ + Samples.fColor21 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \ + Samples.fColor31 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +0), iTextureSize))); \ + \ + Samples.fColor02 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +1), iTextureSize))); \ + Samples.fColor12 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \ + Samples.fColor22 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \ + Samples.fColor32 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +1), iTextureSize))); \ + \ + Samples.fColor03 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +2), iTextureSize))); \ + Samples.fColor13 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +2), iTextureSize))); \ + Samples.fColor23 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +2), iTextureSize))); \ + Samples.fColor33 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +2), iTextureSize))); \ + \ + return Samples; \ + } + +#define DeclareCustomFetchBicubicSamples(Name, LoadTexture) \ + DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture) + +#define DeclareCustomFetchBicubicSamplesMin16(Name, LoadTexture) \ + DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture) + +#define DeclareCustomFetchBilinearSamplesWithType(SampleType, TextureType,AddrType, Name, LoadTexture) \ + SampleType Name(AddrType iPxSample, AddrType iTextureSize) \ + { \ + SampleType Samples; \ + Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \ + Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \ + Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \ + Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \ + return Samples; \ + } + +#define DeclareCustomFetchBilinearSamples(Name, LoadTexture) \ + DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture) + +#define DeclareCustomFetchBilinearSamplesMin16(Name, LoadTexture) \ + DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture) + +// BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102) +// is common, so iPxSample can "jitter" +#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \ + FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ + { \ + FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \ + FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \ + /* Clamp base coords */ \ + fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x-1), fPxSample.x)); \ + fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y-1), fPxSample.y)); \ + /* */ \ + FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ + FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ + return fColorXY; \ + } + +#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \ + FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ + { \ + FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \ + FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \ + /* Clamp base coords */ \ + fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \ + fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \ + /* */ \ + FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ + FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ + return fColorXY; \ + } + +#define FFX_FSR3UPSCALER_CONCAT_ID(x, y) x ## y +#define FFX_FSR3UPSCALER_CONCAT(x, y) FFX_FSR3UPSCALER_CONCAT_ID(x, y) +#define FFX_FSR3UPSCALER_SAMPLER_1D_0 Lanczos2 +#define FFX_FSR3UPSCALER_SAMPLER_1D_1 Lanczos2LUT +#define FFX_FSR3UPSCALER_SAMPLER_1D_2 Lanczos2Approx + +#define FFX_FSR3UPSCALER_GET_LANCZOS_SAMPLER1D(x) FFX_FSR3UPSCALER_CONCAT(FFX_FSR3UPSCALER_SAMPLER_1D_, x) + +#endif //!defined( FFX_FSR3UPSCALER_SAMPLE_H ) diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h.meta b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h.meta new file mode 100644 index 0000000..1e2fde8 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 0c406ac147d40644fab8636a35029185 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change.h b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change.h new file mode 100644 index 0000000..2eb23aa --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change.h @@ -0,0 +1,68 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +FFX_STATIC const FfxInt32 s_MipLevelsToUse = 3; + +struct ShadingChangeLumaInfo +{ + FfxFloat32 fSamples[s_MipLevelsToUse]; +}; + +ShadingChangeLumaInfo ComputeShadingChangeLuma(FfxInt32x2 iPxPos, FfxFloat32x2 fUv, const FfxInt32x2 iCurrentSize) +{ + ShadingChangeLumaInfo info; + + const FfxFloat32x2 fMipUv = ClampUv(fUv, ShadingChangeRenderSize(), GetSPDMipDimensions(0)); + + FFX_UNROLL + for (FfxInt32 iMipLevel = iShadingChangeMipStart; iMipLevel < s_MipLevelsToUse; iMipLevel++) { + + const FfxFloat32x2 fSample = SampleSPDMipLevel(fMipUv, iMipLevel); + + info.fSamples[iMipLevel] = abs(fSample.x * fSample.y); + } + + return info; +} + +void ShadingChange(FfxInt32x2 iPxPos) +{ + if (IsOnScreen(FfxInt32x2(iPxPos), ShadingChangeRenderSize())) { + + const FfxFloat32x2 fUv = (iPxPos + 0.5f) / ShadingChangeRenderSize(); + const FfxFloat32x2 fUvJittered = fUv + Jitter() / RenderSize(); + + const ShadingChangeLumaInfo info = ComputeShadingChangeLuma(iPxPos, fUvJittered, ShadingChangeRenderSize()); + + const FfxFloat32 fScale = 1.0f + iShadingChangeMipStart / s_MipLevelsToUse; + FfxFloat32 fShadingChange = 0.0f; + FFX_UNROLL + for (int iMipLevel = iShadingChangeMipStart; iMipLevel < s_MipLevelsToUse; iMipLevel++) + { + if (info.fSamples[iMipLevel] > 0) { + fShadingChange = ffxMax(fShadingChange, info.fSamples[iMipLevel]) * fScale; + } + } + + StoreShadingChange(iPxPos, ffxSaturate(fShadingChange)); + } +} diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change.h.meta b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change.h.meta new file mode 100644 index 0000000..b2c234a --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: d20f9266c072698448f3b51d3d478520 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid.h b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid.h new file mode 100644 index 0000000..651c5b3 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid.h @@ -0,0 +1,299 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +FFX_GROUPSHARED FfxUInt32 spdCounter; + +void SpdIncreaseAtomicCounter(FfxUInt32 slice) +{ + SPD_IncreaseAtomicCounter(spdCounter); +} + +FfxUInt32 SpdGetAtomicCounter() +{ + return spdCounter; +} + +void SpdResetAtomicCounter(FfxUInt32 slice) +{ + SPD_ResetAtomicCounter(); +} + +#ifndef SPD_PACKED_ONLY +FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16]; + +FFX_STATIC const FfxInt32 DIFFERENCE = 0; +FFX_STATIC const FfxInt32 SIGN_SUM = 1; +FFX_STATIC const FfxInt32 MIP0_INDICATOR = 2; + +FfxFloat32x2 Sort2(FfxFloat32x2 v) +{ + return FfxFloat32x2(ffxMin(v.x, v.y), ffxMax(v.x, v.y)); +} + +struct SampleSet +{ + FfxFloat32 fSamples[SHADING_CHANGE_SET_SIZE]; +}; + +#define CompareSwap(i, j) \ +{ \ +FfxFloat32 fTmp = ffxMin(fSet.fSamples[i], fSet.fSamples[j]);\ +fSet.fSamples[j] = ffxMax(fSet.fSamples[i], fSet.fSamples[j]);\ +fSet.fSamples[i] = fTmp;\ +} + +#if SHADING_CHANGE_SET_SIZE == 5 +FFX_STATIC const FfxInt32x2 iSampleOffsets[5] = {FfxInt32x2(+0, +0), FfxInt32x2(-1, +0), FfxInt32x2(+1, +0), FfxInt32x2(+0, -1), FfxInt32x2(+0, +1)}; + +void SortSet(FFX_PARAMETER_INOUT SampleSet fSet) +{ + CompareSwap(0, 3); + CompareSwap(1, 4); + CompareSwap(0, 2); + CompareSwap(1, 3); + CompareSwap(0, 1); + CompareSwap(2, 4); + CompareSwap(1, 2); + CompareSwap(3, 4); + CompareSwap(2, 3); +} +#endif + +FfxFloat32 ComputeMinimumDifference(FfxInt32x2 iPxPos, SampleSet fSet0, SampleSet fSet1) +{ + FfxFloat32 fMinDiff = FSR3UPSCALER_FP16_MAX - 1; + FfxInt32 a = 0; + FfxInt32 b = 0; + + SortSet(fSet0); + SortSet(fSet1); + + const FfxFloat32 fMax = ffxMin(fSet0.fSamples[SHADING_CHANGE_SET_SIZE-1], fSet1.fSamples[SHADING_CHANGE_SET_SIZE-1]); + + if (fMax > FSR3UPSCALER_FP32_MIN) { + + FFX_UNROLL + for (FfxInt32 i = 0; i < SHADING_CHANGE_SET_SIZE && (fMinDiff < FSR3UPSCALER_FP16_MAX); i++) { + + FfxFloat32 fDiff = fSet0.fSamples[a] - fSet1.fSamples[b]; + + if (abs(fDiff) > FSR3UPSCALER_FP16_MIN) { + + fDiff = sign(fDiff) * (1.0f - MinDividedByMax(fSet0.fSamples[a], fSet1.fSamples[b])); + + fMinDiff = (abs(fDiff) < abs(fMinDiff)) ? fDiff : fMinDiff; + + a += FfxInt32(fSet0.fSamples[a] < fSet1.fSamples[b]); + b += FfxInt32(fSet0.fSamples[a] >= fSet1.fSamples[b]); + } + else + { + fMinDiff = FSR3UPSCALER_FP16_MAX; + } + } + } + + return fMinDiff * FfxFloat32(fMinDiff < (FSR3UPSCALER_FP16_MAX - 1)); +} + +SampleSet GetCurrentLumaBilinearSamples(FfxFloat32x2 fUv) +{ + const FfxFloat32x2 fUvJittered = fUv + Jitter() / RenderSize(); + const FfxInt32x2 iBasePos = FfxInt32x2(floor(fUvJittered * RenderSize())); + + SampleSet fSet; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 0; iSampleIndex < SHADING_CHANGE_SET_SIZE; iSampleIndex++) { + const FfxInt32x2 iSamplePos = ClampLoad(iBasePos, iSampleOffsets[iSampleIndex], RenderSize()); + fSet.fSamples[iSampleIndex] = LoadCurrentLuma(iSamplePos) * Exposure(); + fSet.fSamples[iSampleIndex] = ffxPow(fSet.fSamples[iSampleIndex], fShadingChangeSamplePow); + fSet.fSamples[iSampleIndex] = ffxMax(fSet.fSamples[iSampleIndex], FSR3UPSCALER_EPSILON); + } + + return fSet; +} + +struct PreviousLumaBilinearSamplesData +{ + SampleSet fSet; + FfxBoolean bIsExistingSample; +}; + +PreviousLumaBilinearSamplesData GetPreviousLumaBilinearSamples(FfxFloat32x2 fUv, FfxFloat32x2 fMotionVector) +{ + PreviousLumaBilinearSamplesData data = (PreviousLumaBilinearSamplesData)0; + + const FfxFloat32x2 fUvJittered = fUv + PreviousFrameJitter() / PreviousFrameRenderSize(); + const FfxFloat32x2 fReprojectedUv = fUvJittered + fMotionVector; + + data.bIsExistingSample = IsUvInside(fReprojectedUv); + + if (data.bIsExistingSample) { + + const FfxInt32x2 iBasePos = FfxInt32x2(floor(fReprojectedUv * PreviousFrameRenderSize())); + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 0; iSampleIndex < SHADING_CHANGE_SET_SIZE; iSampleIndex++) { + + const FfxInt32x2 iSamplePos = ClampLoad(iBasePos, iSampleOffsets[iSampleIndex], PreviousFrameRenderSize()); + data.fSet.fSamples[iSampleIndex] = LoadPreviousLuma(iSamplePos) * DeltaPreExposure() * Exposure(); + data.fSet.fSamples[iSampleIndex] = ffxPow(data.fSet.fSamples[iSampleIndex], fShadingChangeSamplePow); + data.fSet.fSamples[iSampleIndex] = ffxMax(data.fSet.fSamples[iSampleIndex], FSR3UPSCALER_EPSILON); + } + } + + return data; +} + +FfxFloat32 ComputeDiff(FfxInt32x2 iPxPos, FfxFloat32x2 fUv, FfxFloat32x2 fMotionVector) +{ + FfxFloat32 fMinDiff = 0.0f; + + const SampleSet fCurrentSamples = GetCurrentLumaBilinearSamples(fUv); + const PreviousLumaBilinearSamplesData previousData = GetPreviousLumaBilinearSamples(fUv, fMotionVector); + + if (previousData.bIsExistingSample) { + fMinDiff = ComputeMinimumDifference(iPxPos, fCurrentSamples, previousData.fSet); + } + + return fMinDiff; +} + +FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 iPxPos, FfxUInt32 slice) +{ + const FfxInt32x2 iPxSamplePos = ClampLoad(FfxInt32x2(iPxPos), FfxInt32x2(0, 0), FfxInt32x2(RenderSize())); + const FfxFloat32x2 fDilatedMotionVector = LoadDilatedMotionVector(iPxSamplePos); + const FfxFloat32x2 fUv = (iPxSamplePos + 0.5f) / RenderSize(); + + const FfxFloat32 fScaledAndSignedLumaDiff = ComputeDiff(iPxSamplePos, fUv, fDilatedMotionVector); + + FfxFloat32x4 fOutput = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f); + fOutput[DIFFERENCE] = fScaledAndSignedLumaDiff; + fOutput[SIGN_SUM] = (fScaledAndSignedLumaDiff != 0.0f) ? sign(fScaledAndSignedLumaDiff) : 0.0f; + fOutput[MIP0_INDICATOR] = 1.0f; + + return fOutput; +} + +FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice) +{ + return FfxFloat32x4(RWLoadPyramid(tex, 5), 0, 0); +} + +FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) +{ + return (v0 + v1 + v2 + v3) * 0.25f; +} + +void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice) +{ + if (index >= iShadingChangeMipStart) + { + StorePyramid(pix, outValue.xy, index); + } +} + +FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat32x4( + spdIntermediateR[x][y], + spdIntermediateG[x][y], + spdIntermediateB[x][y], + spdIntermediateA[x][y]); +} +void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) +{ + spdIntermediateR[x][y] = value.x; + spdIntermediateG[x][y] = value.y; + spdIntermediateB[x][y] = value.z; + spdIntermediateA[x][y] = value.w; +} + +#endif + +// define fetch and store functions Packed +#if FFX_HALF + +FFX_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16]; +FFX_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16]; + +FfxFloat16x4 SpdLoadSourceImageH(FfxFloat32x2 tex, FfxUInt32 slice) +{ + return FfxFloat16x4(0, 0, 0, 0); +} + +FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice) +{ + return FfxFloat16x4(0, 0, 0, 0); +} + +void SpdStoreH(FfxInt32x2 p, FfxFloat16x4 value, FfxUInt32 mip, FfxUInt32 slice) +{ +} + +FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat16x4( + spdIntermediateRG[x][y].x, + spdIntermediateRG[x][y].y, + spdIntermediateBA[x][y].x, + spdIntermediateBA[x][y].y); +} + +void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value) +{ + spdIntermediateRG[x][y] = value.xy; + spdIntermediateBA[x][y] = value.zw; +} + +FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3) +{ + return (v0 + v1 + v2 + v3) * FfxFloat16(0.25); +} +#endif + +#include "../spd/ffx_spd.h" + +void ComputeShadingChangePyramid(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex) +{ +#if FFX_HALF + SpdDownsampleH( + FfxUInt32x2(WorkGroupId.xy), + FfxUInt32(LocalThreadIndex), + FfxUInt32(MipCount()), + FfxUInt32(NumWorkGroups()), + FfxUInt32(WorkGroupId.z), + FfxUInt32x2(WorkGroupOffset())); +#else + SpdDownsample( + FfxUInt32x2(WorkGroupId.xy), + FfxUInt32(LocalThreadIndex), + FfxUInt32(MipCount()), + FfxUInt32(NumWorkGroups()), + FfxUInt32(WorkGroupId.z), + FfxUInt32x2(WorkGroupOffset())); +#endif +} diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid.h.meta b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid.h.meta new file mode 100644 index 0000000..94175f1 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: fac4b69395377df409c7677a54515cd8 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h new file mode 100644 index 0000000..2d446bb --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h @@ -0,0 +1,250 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define USE_YCOCG 1 + +#define fAutogenEpsilon 0.01f + +// EXPERIMENTAL + +FFX_MIN16_F ComputeAutoTC_01(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) +{ + FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId); + FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId); + FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx); + FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx); + +#if USE_YCOCG + colorPreAlpha = RGBToYCoCg(colorPreAlpha); + colorPostAlpha = RGBToYCoCg(colorPostAlpha); + colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha); + colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha); +#endif + + FfxFloat32x3 colorDeltaCurr = colorPostAlpha - colorPreAlpha; + FfxFloat32x3 colorDeltaPrev = colorPrevPostAlpha - colorPrevPreAlpha; + bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDeltaCurr), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon))); + bool hadAlpha = any(FFX_GREATER_THAN(abs(colorDeltaPrev), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon))); + + FfxFloat32x3 X = colorPreAlpha; + FfxFloat32x3 Y = colorPostAlpha; + FfxFloat32x3 Z = colorPrevPreAlpha; + FfxFloat32x3 W = colorPrevPostAlpha; + + FFX_MIN16_F retVal = FFX_MIN16_F(ffxSaturate(dot(abs(abs(Y - X) - abs(W - Z)), FfxFloat32x3(1, 1, 1)))); + + // cleanup very small values + retVal = (retVal < TcThreshold()) ? FFX_MIN16_F(0.0f) : FFX_MIN16_F(1.f); + + return retVal; +} + +// works ok: thin edges +FFX_MIN16_F ComputeAutoTC_02(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) +{ + FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId); + FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId); + FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx); + FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx); + +#if USE_YCOCG + colorPreAlpha = RGBToYCoCg(colorPreAlpha); + colorPostAlpha = RGBToYCoCg(colorPostAlpha); + colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha); + colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha); +#endif + + FfxFloat32x3 colorDelta = colorPostAlpha - colorPreAlpha; + FfxFloat32x3 colorPrevDelta = colorPrevPostAlpha - colorPrevPreAlpha; + bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon))); + bool hadAlpha = any(FFX_GREATER_THAN(abs(colorPrevDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon))); + + FfxFloat32x3 delta = colorPostAlpha - colorPreAlpha; //prev+1*d = post => d = color, alpha = + FfxFloat32x3 deltaPrev = colorPrevPostAlpha - colorPrevPreAlpha; + + FfxFloat32x3 X = colorPrevPreAlpha; + FfxFloat32x3 N = colorPreAlpha - colorPrevPreAlpha; + FfxFloat32x3 YAminusXA = colorPrevPostAlpha - colorPrevPreAlpha; + FfxFloat32x3 NminusNA = colorPostAlpha - colorPrevPostAlpha; + + FfxFloat32x3 A = (hasAlpha || hadAlpha) ? NminusNA / max(FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon), N) : FfxFloat32x3(0, 0, 0); + + FFX_MIN16_F retVal = FFX_MIN16_F( max(max(A.x, A.y), A.z) ); + + // only pixels that have significantly changed in color shuold be considered + retVal = ffxSaturate(retVal * FFX_MIN16_F(length(colorPostAlpha - colorPrevPostAlpha)) ); + + return retVal; +} + +// This function computes the TransparencyAndComposition mask: +// This mask indicates pixels that should discard locks and apply color clamping. +// +// Typically this is the case for translucent pixels (that don't write depth values) or pixels where the correctness of +// the MVs can not be guaranteed (e.g. procedutal movement or vegetation that does not have MVs to reduce the cost during rasterization) +// Also, large changes in color due to changed lighting should be marked to remove locks on pixels with "old" lighting. +// +// This function takes a opaque only and a final texture and uses internal copies of those textures from the last frame. +// The function tries to determine where the color changes between opaque only and final image to determine the pixels that use transparency. +// Also it uses the previous frames and detects where the use of transparency changed to mark those pixels. +// Additionally it marks pixels where the color changed significantly in the opaque only image, e.g. due to lighting or texture animation. +// +// In the final step it stores the current textures in internal textures for the next frame + +FFX_MIN16_F ComputeTransparencyAndComposition(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) +{ + FFX_MIN16_F retVal = ComputeAutoTC_02(uDispatchThreadId, iPrevIdx); + + // [branch] + if (retVal > FFX_MIN16_F(0.01f)) + { + retVal = ComputeAutoTC_01(uDispatchThreadId, iPrevIdx); + } + return retVal; +} + +float computeSolidEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos) +{ + float lum[9]; + int i = 0; + for (int y = -1; y < 2; ++y) + { + for (int x = -1; x < 2; ++x) + { + FfxFloat32x3 curCol = LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb; + FfxFloat32x3 prevCol = LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb; + lum[i++] = length(curCol - prevCol); + } + } + + //float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]); + //float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]); + + //return sqrt(gradX * gradX + gradY * gradY); + + float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]); + float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]); + + return sqrt(sqrt(gradX * gradY)); +} + +float computeAlphaEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos) +{ + float lum[9]; + int i = 0; + for (int y = -1; y < 2; ++y) + { + for (int x = -1; x < 2; ++x) + { + FfxFloat32x3 curCol = abs(LoadInputColor(curPos + FFX_MIN16_I2(x, y)).rgb - LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb); + FfxFloat32x3 prevCol = abs(LoadPrevPostAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb - LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb); + lum[i++] = length(curCol - prevCol); + } + } + + //float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]); + //float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]); + + //return sqrt(gradX * gradX + gradY * gradY); + + float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]); + float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]); + + return sqrt(sqrt(gradX * gradY)); +} + +FFX_MIN16_F ComputeAabbOverlap(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) +{ + FFX_MIN16_F retVal = FFX_MIN16_F(0.f); + + FfxFloat32x2 fMotionVector = LoadInputMotionVector(uDispatchThreadId); + FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId); + FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId); + FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx); + FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx); + +#if USE_YCOCG + colorPreAlpha = RGBToYCoCg(colorPreAlpha); + colorPostAlpha = RGBToYCoCg(colorPostAlpha); + colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha); + colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha); +#endif + FfxFloat32x3 minPrev = FFX_MIN16_F3(+1000.f, +1000.f, +1000.f); + FfxFloat32x3 maxPrev = FFX_MIN16_F3(-1000.f, -1000.f, -1000.f); + for (int y = -1; y < 2; ++y) + { + for (int x = -1; x < 2; ++x) + { + FfxFloat32x3 W = LoadPrevPostAlpha(iPrevIdx + FFX_MIN16_I2(x, y)); + +#if USE_YCOCG + W = RGBToYCoCg(W); +#endif + minPrev = min(minPrev, W); + maxPrev = max(maxPrev, W); + } + } + // instead of computing the overlap: simply count how many samples are outside + // set reactive based on that + FFX_MIN16_F count = FFX_MIN16_F(0.f); + for (int y = -1; y < 2; ++y) + { + for (int x = -1; x < 2; ++x) + { + FfxFloat32x3 Y = LoadInputColor(uDispatchThreadId + FFX_MIN16_I2(x, y)); + +#if USE_YCOCG + Y = RGBToYCoCg(Y); +#endif + count += ((Y.x < minPrev.x) || (Y.x > maxPrev.x)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f); + count += ((Y.y < minPrev.y) || (Y.y > maxPrev.y)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f); + count += ((Y.z < minPrev.z) || (Y.z > maxPrev.z)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f); + } + } + retVal = count / FFX_MIN16_F(27.f); + + return retVal; +} + + +// This function computes the Reactive mask: +// We want pixels marked where the alpha portion of the frame changes a lot between neighbours +// Those pixels are expected to change quickly between frames, too. (e.g. small particles, reflections on curved surfaces...) +// As a result history would not be trustworthy. +// On the other hand we don't want pixels marked where pre-alpha has a large differnce, since those would profit from accumulation +// For mirrors we may assume the pre-alpha is pretty uniform color. +// +// This works well generally, but also marks edge pixels +FFX_MIN16_F ComputeReactive(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx) +{ + // we only get here if alpha has a significant contribution and has changed since last frame. + FFX_MIN16_F retVal = FFX_MIN16_F(0.f); + + // mark pixels with huge variance in alpha as reactive + FFX_MIN16_F alphaEdge = FFX_MIN16_F(computeAlphaEdge(uDispatchThreadId, iPrevIdx)); + FFX_MIN16_F opaqueEdge = FFX_MIN16_F(computeSolidEdge(uDispatchThreadId, iPrevIdx)); + retVal = ffxSaturate(alphaEdge - opaqueEdge); + + // the above also marks edge pixels due to jitter, so we need to cancel those out + + + return retVal; +} diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h.meta b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h.meta new file mode 100644 index 0000000..67b7d76 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 21f8219a8876f77478e60862240a41f8 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h new file mode 100644 index 0000000..2d587f0 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h @@ -0,0 +1,184 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +void Deringing(RectificationBox clippingBox, FFX_PARAMETER_INOUT FfxFloat32x3 fColor) +{ + fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax); +} + +#ifndef FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE +#define FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE 2 // Approximate +#endif + +FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32 fKernelWeight) +{ + FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx; +#if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE + FfxFloat32 fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased)); +#elif FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT + FfxFloat32 fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased)); +#elif FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE + FfxFloat32 fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); +#else +#error "Invalid Lanczos type" +#endif + return fSampleWeight; +} + +FfxFloat32 ComputeMaxKernelWeight(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) { + + const FfxFloat32 fKernelSizeBias = 1.0f + (1.0f / FfxFloat32x2(DownscaleFactor()) - 1.0f).x; + + return ffxMin(FfxFloat32(1.99f), fKernelSizeBias); +} + +FfxFloat32x3 LoadPreparedColor(FfxInt32x2 iSamplePos) +{ + const FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iSamplePos)) * Exposure(); + const FfxFloat32x3 fPreparedYCoCg = RGBToYCoCg(fRgb); + + return fPreparedYCoCg; +} + +void ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) +{ + // We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly) + const FfxFloat32x2 fDstOutputPos = FfxFloat32x2(params.iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f); + const FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor(); + const FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos)); + const FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0 + const FfxFloat32x2 fBaseSampleOffset = FfxFloat32x2(fSrcUnjitteredPos - fSrcOutputPos); + + FfxInt32x2 offsetTL; + offsetTL.x = (fSrcUnjitteredPos.x > fSrcOutputPos.x) ? FfxInt32(-2) : FfxInt32(-1); + offsetTL.y = (fSrcUnjitteredPos.y > fSrcOutputPos.y) ? FfxInt32(-2) : FfxInt32(-1); + + //Load samples + // If fSrcUnjitteredPos.y > fSrcOutputPos.y, indicates offsetTL.y = -2, sample offset Y will be [-2, 1], clipbox will be rows [1, 3]. + // Flip row# for sampling offset in this case, so first 0~2 rows in the sampled array can always be used for computing the clipbox. + // This reduces branch or cmove on sampled colors, but moving this overhead to sample position / weight calculation time which apply to less values. + const FfxBoolean bFlipRow = fSrcUnjitteredPos.y > fSrcOutputPos.y; + const FfxBoolean bFlipCol = fSrcUnjitteredPos.x > fSrcOutputPos.x; + const FfxFloat32x2 fOffsetTL = FfxFloat32x2(offsetTL); + + const FfxBoolean bIsInitialSample = (params.fAccumulation == 0.0f); + + FfxFloat32x3 fSamples[9]; + FfxInt32 iSampleIndex = 0; + + FFX_UNROLL + for (FfxInt32 row = 0; row < 3; row++) { + FFX_UNROLL + for (FfxInt32 col = 0; col < 3; col++) { + const FfxInt32x2 iSampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); + const FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + offsetTL + iSampleColRow; + const FfxInt32x2 iSampleCoord = ClampLoad(iSrcSamplePos, FfxInt32x2(0, 0), FfxInt32x2(RenderSize())); + + fSamples[iSampleIndex] = LoadPreparedColor(iSampleCoord); + + ++iSampleIndex; + } + } + +#if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT + if (bIsInitialSample) + { + for (iSampleIndex = 0; iSampleIndex < 9; ++iSampleIndex) + { + //YCoCg -> RGB -> Tonemap -> YCoCg (Use RGB tonemapper to avoid color desaturation) + fSamples[iSampleIndex] = RGBToYCoCg(Tonemap(YCoCgToRGB(fSamples[iSampleIndex]))); + } + } +#endif + + // Identify how much of each upsampled color to be used for this frame + const FfxFloat32 fKernelBiasMax = ComputeMaxKernelWeight(params, data); + const FfxFloat32 fKernelBiasMin = ffxMax(1.0f, ((1.0f + fKernelBiasMax) * 0.3f)); + + const FfxFloat32 fKernelBiasWeight = + ffxMin(1.0f - params.fDisocclusion * 0.5f, + ffxMin(1.0f - params.fShadingChange, + ffxSaturate(data.fHistoryWeight * 5.0f) + )); + + const FfxFloat32 fKernelBias = ffxLerp(fKernelBiasMin, fKernelBiasMax, fKernelBiasWeight); + + + iSampleIndex = 0; + + FFX_UNROLL + for (FfxInt32 row = 0; row < 3; row++) + { + FFX_UNROLL + for (FfxInt32 col = 0; col < 3; col++) + { + const FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); + const FfxFloat32x2 fOffset = fOffsetTL + FfxFloat32x2(sampleColRow); + const FfxFloat32x2 fSrcSampleOffset = fBaseSampleOffset + fOffset; + + const FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow; + const FfxFloat32 fOnScreenFactor = FfxFloat32(IsOnScreen(FfxInt32x2(iSrcSamplePos), FfxInt32x2(RenderSize()))); + + if (!bIsInitialSample) + { + const FfxFloat32 fSampleWeight = fOnScreenFactor * FfxFloat32(GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelBias)); + + data.fUpsampledColor += fSamples[iSampleIndex] * fSampleWeight; + data.fUpsampledWeight += fSampleWeight; + } + + // Update rectification box + { + const FfxFloat32 fRectificationCurveBias = -2.3f; + const FfxFloat32 fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset); + const FfxFloat32 fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq) * fOnScreenFactor; + + const FfxBoolean bInitialSample = (row == 0) && (col == 0); + RectificationBoxAddSample(bInitialSample, data.clippingBox, fSamples[iSampleIndex], fBoxSampleWeight); + } + ++iSampleIndex; + } + } + + RectificationBoxComputeVarianceBoxData(data.clippingBox); + + data.fUpsampledWeight *= FfxFloat32(data.fUpsampledWeight > FSR3UPSCALER_EPSILON); + + if (data.fUpsampledWeight > FSR3UPSCALER_EPSILON) { + // Normalize for deringing (we need to compare colors) + data.fUpsampledColor = data.fUpsampledColor / data.fUpsampledWeight; + data.fUpsampledWeight *= fAverageLanczosWeightPerFrame; + + Deringing(data.clippingBox, data.fUpsampledColor); + } + + // Initial samples using tonemapped upsampling + if (bIsInitialSample) { +#if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT + data.fUpsampledColor = RGBToYCoCg(InverseTonemap(YCoCgToRGB(data.clippingBox.boxCenter))); +#else + data.fUpsampledColor = data.clippingBox.boxCenter; +#endif + data.fUpsampledWeight = 1.0f; + data.fHistoryWeight = 0.0f; + } +} diff --git a/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h.meta b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h.meta new file mode 100644 index 0000000..7dd7a51 --- /dev/null +++ b/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: 06167cdbc8ec55e4ab6913f7b5f1abf8 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/spd.meta b/Shaders/shaders/spd.meta new file mode 100644 index 0000000..c564f28 --- /dev/null +++ b/Shaders/shaders/spd.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: bd7cab5c46d4cd740ae57c03c8789ca4 +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/spd/ffx_spd.h b/Shaders/shaders/spd/ffx_spd.h new file mode 100644 index 0000000..6d4f997 --- /dev/null +++ b/Shaders/shaders/spd/ffx_spd.h @@ -0,0 +1,1007 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +/// @defgroup FfxGPUSpd FidelityFX SPD +/// FidelityFX Single Pass Downsampler 2.0 GPU documentation +/// +/// @ingroup FfxGPUEffects + +/// Setup required constant values for SPD (CPU). +/// +/// @param [out] dispatchThreadGroupCountXY CPU side: dispatch thread group count xy. z is number of slices of the input texture +/// @param [out] workGroupOffset GPU side: pass in as constant +/// @param [out] numWorkGroupsAndMips GPU side: pass in as constant +/// @param [in] rectInfo left, top, width, height +/// @param [in] mips optional: if -1, calculate based on rect width and height +/// +/// @ingroup FfxGPUSpd +#if defined(FFX_CPU) +FFX_STATIC void ffxSpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY, + FfxUInt32x2 workGroupOffset, + FfxUInt32x2 numWorkGroupsAndMips, + FfxUInt32x4 rectInfo, + FfxInt32 mips) +{ + // determines the offset of the first tile to downsample based on + // left (rectInfo[0]) and top (rectInfo[1]) of the subregion. + workGroupOffset[0] = rectInfo[0] / 64; + workGroupOffset[1] = rectInfo[1] / 64; + + FfxUInt32 endIndexX = (rectInfo[0] + rectInfo[2] - 1) / 64; // rectInfo[0] = left, rectInfo[2] = width + FfxUInt32 endIndexY = (rectInfo[1] + rectInfo[3] - 1) / 64; // rectInfo[1] = top, rectInfo[3] = height + + // we only need to dispatch as many thread groups as tiles we need to downsample + // number of tiles per slice depends on the subregion to downsample + dispatchThreadGroupCountXY[0] = endIndexX + 1 - workGroupOffset[0]; + dispatchThreadGroupCountXY[1] = endIndexY + 1 - workGroupOffset[1]; + + // number of thread groups per slice + numWorkGroupsAndMips[0] = (dispatchThreadGroupCountXY[0]) * (dispatchThreadGroupCountXY[1]); + + if (mips >= 0) + { + numWorkGroupsAndMips[1] = FfxUInt32(mips); + } + else + { + // calculate based on rect width and height + FfxUInt32 resolution = ffxMax(rectInfo[2], rectInfo[3]); + numWorkGroupsAndMips[1] = FfxUInt32((ffxMin(floor(log2(FfxFloat32(resolution))), FfxFloat32(12)))); + } +} + +/// Setup required constant values for SPD (CPU). +/// +/// @param [out] dispatchThreadGroupCountXY CPU side: dispatch thread group count xy. z is number of slices of the input texture +/// @param [out] workGroupOffset GPU side: pass in as constant +/// @param [out] numWorkGroupsAndMips GPU side: pass in as constant +/// @param [in] rectInfo left, top, width, height +/// +/// @ingroup FfxGPUSpd +FFX_STATIC void ffxSpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY, + FfxUInt32x2 workGroupOffset, + FfxUInt32x2 numWorkGroupsAndMips, + FfxUInt32x4 rectInfo) +{ + ffxSpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo, -1); +} +#endif // #if defined(FFX_CPU) + + +//============================================================================================================================== +// NON-PACKED VERSION +//============================================================================================================================== +#if defined(FFX_GPU) +#if defined(FFX_SPD_PACKED_ONLY) +// Avoid compiler errors by including default implementations of these callbacks. +FfxFloat32x4 SpdLoadSourceImage(FfxInt32x2 p, FfxUInt32 slice) +{ + return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); +} + +FfxFloat32x4 SpdLoad(FfxInt32x2 p, FfxUInt32 slice) +{ + return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); +} +void SpdStore(FfxInt32x2 p, FfxFloat32x4 value, FfxUInt32 mip, FfxUInt32 slice) +{ +} +FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); +} +void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) +{ +} +FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) +{ + return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); +} +#endif // #if FFX_SPD_PACKED_ONLY + +//_____________________________________________________________/\_______________________________________________________________ + +void ffxSpdWorkgroupShuffleBarrier() +{ + FFX_GROUP_MEMORY_BARRIER; +} + +// Only last active workgroup should proceed +bool SpdExitWorkgroup(FfxUInt32 numWorkGroups, FfxUInt32 localInvocationIndex, FfxUInt32 slice) +{ + // global atomic counter + if (localInvocationIndex == 0) + { + SpdIncreaseAtomicCounter(slice); + } + + ffxSpdWorkgroupShuffleBarrier(); + return (SpdGetAtomicCounter() != (numWorkGroups - 1)); +} + +// User defined: FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3); +FfxFloat32x4 SpdReduceQuad(FfxFloat32x4 v) +{ +#if defined(FFX_GLSL) && !defined(FFX_SPD_NO_WAVE_OPERATIONS) + + FfxFloat32x4 v0 = v; + FfxFloat32x4 v1 = subgroupQuadSwapHorizontal(v); + FfxFloat32x4 v2 = subgroupQuadSwapVertical(v); + FfxFloat32x4 v3 = subgroupQuadSwapDiagonal(v); + return SpdReduce4(v0, v1, v2, v3); + +#elif defined(FFX_HLSL) && !defined(FFX_SPD_NO_WAVE_OPERATIONS) + + // requires SM6.0 + FfxFloat32x4 v0 = v; + FfxFloat32x4 v1 = QuadReadAcrossX(v); + FfxFloat32x4 v2 = QuadReadAcrossY(v); + FfxFloat32x4 v3 = QuadReadAcrossDiagonal(v); + return SpdReduce4(v0, v1, v2, v3); +/* + // if SM6.0 is not available, you can use the AMD shader intrinsics + // the AMD shader intrinsics are available in AMD GPU Services (AGS) library: + // https://gpuopen.com/amd-gpu-services-ags-library/ + // works for DX11 + FfxFloat32x4 v0 = v; + FfxFloat32x4 v1; + v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + FfxFloat32x4 v2; + v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + FfxFloat32x4 v3; + v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + return SpdReduce4(v0, v1, v2, v3); + */ +#endif + return v; +} + +FfxFloat32x4 SpdReduceIntermediate(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3) +{ + FfxFloat32x4 v0 = SpdLoadIntermediate(i0.x, i0.y); + FfxFloat32x4 v1 = SpdLoadIntermediate(i1.x, i1.y); + FfxFloat32x4 v2 = SpdLoadIntermediate(i2.x, i2.y); + FfxFloat32x4 v3 = SpdLoadIntermediate(i3.x, i3.y); + return SpdReduce4(v0, v1, v2, v3); +} + +FfxFloat32x4 SpdReduceLoad4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice) +{ + FfxFloat32x4 v0 = SpdLoad(FfxInt32x2(i0), slice); + FfxFloat32x4 v1 = SpdLoad(FfxInt32x2(i1), slice); + FfxFloat32x4 v2 = SpdLoad(FfxInt32x2(i2), slice); + FfxFloat32x4 v3 = SpdLoad(FfxInt32x2(i3), slice); + return SpdReduce4(v0, v1, v2, v3); +} + +FfxFloat32x4 SpdReduceLoad4(FfxUInt32x2 base, FfxUInt32 slice) +{ + return SpdReduceLoad4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice); +} + +FfxFloat32x4 SpdReduceLoadSourceImage4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice) +{ + FfxFloat32x4 v0 = SpdLoadSourceImage(FfxInt32x2(i0), slice); + FfxFloat32x4 v1 = SpdLoadSourceImage(FfxInt32x2(i1), slice); + FfxFloat32x4 v2 = SpdLoadSourceImage(FfxInt32x2(i2), slice); + FfxFloat32x4 v3 = SpdLoadSourceImage(FfxInt32x2(i3), slice); + return SpdReduce4(v0, v1, v2, v3); +} + +FfxFloat32x4 SpdReduceLoadSourceImage(FfxUInt32x2 base, FfxUInt32 slice) +{ +#if defined(SPD_LINEAR_SAMPLER) + return SpdLoadSourceImage(FfxInt32x2(base), slice); +#else + return SpdReduceLoadSourceImage4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice); +#endif +} + +void SpdDownsampleMips_0_1_Intrinsics(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ + FfxFloat32x4 v[4]; + + FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2); + FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y); + v[0] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[0], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y); + v[1] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[1], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16); + v[2] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[2], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16); + v[3] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[3], 0, slice); + + if (mip <= 1) + return; + + v[0] = SpdReduceQuad(v[0]); + v[1] = SpdReduceQuad(v[1]); + v[2] = SpdReduceQuad(v[2]); + v[3] = SpdReduceQuad(v[3]); + + if ((localInvocationIndex % 4) == 0) + { + SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2), v[0], 1, slice); + SpdStoreIntermediate(x / 2, y / 2, v[0]); + + SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2), v[1], 1, slice); + SpdStoreIntermediate(x / 2 + 8, y / 2, v[1]); + + SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2 + 8), v[2], 1, slice); + SpdStoreIntermediate(x / 2, y / 2 + 8, v[2]); + + SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2 + 8), v[3], 1, slice); + SpdStoreIntermediate(x / 2 + 8, y / 2 + 8, v[3]); + } +} + +void SpdDownsampleMips_0_1_LDS(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ + FfxFloat32x4 v[4]; + + FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2); + FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y); + v[0] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[0], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y); + v[1] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[1], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16); + v[2] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[2], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16); + v[3] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[3], 0, slice); + + if (mip <= 1) + return; + + for (FfxUInt32 i = 0; i < 4; i++) + { + SpdStoreIntermediate(x, y, v[i]); + ffxSpdWorkgroupShuffleBarrier(); + if (localInvocationIndex < 64) + { + v[i] = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); + SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice); + } + ffxSpdWorkgroupShuffleBarrier(); + } + + if (localInvocationIndex < 64) + { + SpdStoreIntermediate(x + 0, y + 0, v[0]); + SpdStoreIntermediate(x + 8, y + 0, v[1]); + SpdStoreIntermediate(x + 0, y + 8, v[2]); + SpdStoreIntermediate(x + 8, y + 8, v[3]); + } +} + +void SpdDownsampleMips_0_1(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#if defined(FFX_SPD_NO_WAVE_OPERATIONS) + SpdDownsampleMips_0_1_LDS(x, y, workGroupID, localInvocationIndex, mip, slice); +#else + SpdDownsampleMips_0_1_Intrinsics(x, y, workGroupID, localInvocationIndex, mip, slice); +#endif +} + + +void SpdDownsampleMip_2(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#if defined(FFX_SPD_NO_WAVE_OPERATIONS) + if (localInvocationIndex < 64) + { + FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); + SpdStore(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x, y), v, mip, slice); + // store to LDS, try to reduce bank conflicts + // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 + // ... + // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 + SpdStoreIntermediate(x * 2 + y % 2, y * 2, v); + } +#else + FfxFloat32x4 v = SpdLoadIntermediate(x, y); + v = SpdReduceQuad(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStore(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x / 2, y / 2), v, mip, slice); + SpdStoreIntermediate(x + (y / 2) % 2, y, v); + } +#endif +} + +void SpdDownsampleMip_3(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#if defined(FFX_SPD_NO_WAVE_OPERATIONS) + if (localInvocationIndex < 16) + { + // x 0 x 0 + // 0 0 0 0 + // 0 x 0 x + // 0 0 0 0 + FfxFloat32x4 v = + SpdReduceIntermediate(FfxUInt32x2(x * 4 + 0 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 2 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 0 + 1, y * 4 + 2), FfxUInt32x2(x * 4 + 2 + 1, y * 4 + 2)); + SpdStore(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x, y), v, mip, slice); + // store to LDS + // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 + // ... + // 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 + // ... + // 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x + // ... + SpdStoreIntermediate(x * 4 + y, y * 4, v); + } +#else + if (localInvocationIndex < 64) + { + FfxFloat32x4 v = SpdLoadIntermediate(x * 2 + y % 2, y * 2); + v = SpdReduceQuad(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStore(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x / 2, y / 2), v, mip, slice); + SpdStoreIntermediate(x * 2 + y / 2, y * 2, v); + } + } +#endif +} + +void SpdDownsampleMip_4(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#if defined(FFX_SPD_NO_WAVE_OPERATIONS) + if (localInvocationIndex < 4) + { + // x 0 0 0 x 0 0 0 + // ... + // 0 x 0 0 0 x 0 0 + FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 8 + 0 + 0 + y * 2, y * 8 + 0), + FfxUInt32x2(x * 8 + 4 + 0 + y * 2, y * 8 + 0), + FfxUInt32x2(x * 8 + 0 + 1 + y * 2, y * 8 + 4), + FfxUInt32x2(x * 8 + 4 + 1 + y * 2, y * 8 + 4)); + SpdStore(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x, y), v, mip, slice); + // store to LDS + // x x x x 0 ... + // 0 ... + SpdStoreIntermediate(x + y * 2, 0, v); + } +#else + if (localInvocationIndex < 16) + { + FfxFloat32x4 v = SpdLoadIntermediate(x * 4 + y, y * 4); + v = SpdReduceQuad(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStore(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x / 2, y / 2), v, mip, slice); + SpdStoreIntermediate(x / 2 + y, 0, v); + } + } +#endif +} + +void SpdDownsampleMip_5(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#if defined(FFX_SPD_NO_WAVE_OPERATIONS) + if (localInvocationIndex < 1) + { + // x x x x 0 ... + // 0 ... + FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(0, 0), FfxUInt32x2(1, 0), FfxUInt32x2(2, 0), FfxUInt32x2(3, 0)); + SpdStore(FfxInt32x2(workGroupID.xy), v, mip, slice); + } +#else + if (localInvocationIndex < 4) + { + FfxFloat32x4 v = SpdLoadIntermediate(localInvocationIndex, 0); + v = SpdReduceQuad(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStore(FfxInt32x2(workGroupID.xy), v, mip, slice); + } + } +#endif +} + +void SpdDownsampleMips_6_7(FfxUInt32 x, FfxUInt32 y, FfxUInt32 mips, FfxUInt32 slice) +{ + FfxInt32x2 tex = FfxInt32x2(x * 4 + 0, y * 4 + 0); + FfxInt32x2 pix = FfxInt32x2(x * 2 + 0, y * 2 + 0); + FfxFloat32x4 v0 = SpdReduceLoad4(tex, slice); + SpdStore(pix, v0, 6, slice); + + tex = FfxInt32x2(x * 4 + 2, y * 4 + 0); + pix = FfxInt32x2(x * 2 + 1, y * 2 + 0); + FfxFloat32x4 v1 = SpdReduceLoad4(tex, slice); + SpdStore(pix, v1, 6, slice); + + tex = FfxInt32x2(x * 4 + 0, y * 4 + 2); + pix = FfxInt32x2(x * 2 + 0, y * 2 + 1); + FfxFloat32x4 v2 = SpdReduceLoad4(tex, slice); + SpdStore(pix, v2, 6, slice); + + tex = FfxInt32x2(x * 4 + 2, y * 4 + 2); + pix = FfxInt32x2(x * 2 + 1, y * 2 + 1); + FfxFloat32x4 v3 = SpdReduceLoad4(tex, slice); + SpdStore(pix, v3, 6, slice); + + if (mips <= 7) + return; + // no barrier needed, working on values only from the same thread + + FfxFloat32x4 v = SpdReduce4(v0, v1, v2, v3); + SpdStore(FfxInt32x2(x, y), v, 7, slice); + SpdStoreIntermediate(x, y, v); +} + +void SpdDownsampleNextFour(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 baseMip, FfxUInt32 mips, FfxUInt32 slice) +{ + if (mips <= baseMip) + return; + ffxSpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_2(x, y, workGroupID, localInvocationIndex, baseMip, slice); + + if (mips <= baseMip + 1) + return; + ffxSpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_3(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice); + + if (mips <= baseMip + 2) + return; + ffxSpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_4(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice); + + if (mips <= baseMip + 3) + return; + ffxSpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_5(workGroupID, localInvocationIndex, baseMip + 3, slice); +} + +/// Downsamples a 64x64 tile based on the work group id. +/// If after downsampling it's the last active thread group, computes the remaining MIP levels. +/// +/// @param [in] workGroupID index of the work group / thread group +/// @param [in] localInvocationIndex index of the thread within the thread group in 1D +/// @param [in] mips the number of total MIP levels to compute for the input texture +/// @param [in] numWorkGroups the total number of dispatched work groups / thread groups for this slice +/// @param [in] slice the slice of the input texture +/// +/// @ingroup FfxGPUSpd +void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice) +{ + // compute MIP level 0 and 1 + FfxUInt32x2 sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64); + FfxUInt32 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2); + FfxUInt32 y = sub_xy.y + 8 * ((localInvocationIndex >> 7)); + SpdDownsampleMips_0_1(x, y, workGroupID, localInvocationIndex, mips, slice); + + // compute MIP level 2, 3, 4, 5 + SpdDownsampleNextFour(x, y, workGroupID, localInvocationIndex, 2, mips, slice); + + if (mips <= 6) + return; + + // increase the global atomic counter for the given slice and check if it's the last remaining thread group: + // terminate if not, continue if yes. + if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice)) + return; + + // reset the global atomic counter back to 0 for the next spd dispatch + SpdResetAtomicCounter(slice); + + // After mip 5 there is only a single workgroup left that downsamples the remaining up to 64x64 texels. + // compute MIP level 6 and 7 + SpdDownsampleMips_6_7(x, y, mips, slice); + + // compute MIP level 8, 9, 10, 11 + SpdDownsampleNextFour(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice); +} +/// Downsamples a 64x64 tile based on the work group id and work group offset. +/// If after downsampling it's the last active thread group, computes the remaining MIP levels. +/// +/// @param [in] workGroupID index of the work group / thread group +/// @param [in] localInvocationIndex index of the thread within the thread group in 1D +/// @param [in] mips the number of total MIP levels to compute for the input texture +/// @param [in] numWorkGroups the total number of dispatched work groups / thread groups for this slice +/// @param [in] slice the slice of the input texture +/// @param [in] workGroupOffset the work group offset. it's (0,0) in case the entire input texture is downsampled. +/// +/// @ingroup FfxGPUSpd +void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset) +{ + SpdDownsample(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//============================================================================================================================== +// PACKED VERSION +//============================================================================================================================== + +#if FFX_HALF + +FfxFloat16x4 SpdReduceQuadH(FfxFloat16x4 v) +{ +#if defined(FFX_GLSL) && !defined(FFX_SPD_NO_WAVE_OPERATIONS) + FfxFloat16x4 v0 = v; + FfxFloat16x4 v1 = subgroupQuadSwapHorizontal(v); + FfxFloat16x4 v2 = subgroupQuadSwapVertical(v); + FfxFloat16x4 v3 = subgroupQuadSwapDiagonal(v); + return SpdReduce4H(v0, v1, v2, v3); +#elif defined(FFX_HLSL) && !defined(FFX_SPD_NO_WAVE_OPERATIONS) + // requires SM6.0 + FfxFloat16x4 v0 = v; + FfxFloat16x4 v1 = QuadReadAcrossX(v); + FfxFloat16x4 v2 = QuadReadAcrossY(v); + FfxFloat16x4 v3 = QuadReadAcrossDiagonal(v); + return SpdReduce4H(v0, v1, v2, v3); +/* + // if SM6.0 is not available, you can use the AMD shader intrinsics + // the AMD shader intrinsics are available in AMD GPU Services (AGS) library: + // https://gpuopen.com/amd-gpu-services-ags-library/ + // works for DX11 + FfxFloat16x4 v0 = v; + FfxFloat16x4 v1; + v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + FfxFloat16x4 v2; + v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + FfxFloat16x4 v3; + v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + return SpdReduce4H(v0, v1, v2, v3); + */ +#endif + return FfxFloat16x4(0.0, 0.0, 0.0, 0.0); +} + +FfxFloat16x4 SpdReduceIntermediateH(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3) +{ + FfxFloat16x4 v0 = SpdLoadIntermediateH(i0.x, i0.y); + FfxFloat16x4 v1 = SpdLoadIntermediateH(i1.x, i1.y); + FfxFloat16x4 v2 = SpdLoadIntermediateH(i2.x, i2.y); + FfxFloat16x4 v3 = SpdLoadIntermediateH(i3.x, i3.y); + return SpdReduce4H(v0, v1, v2, v3); +} + +FfxFloat16x4 SpdReduceLoad4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice) +{ + FfxFloat16x4 v0 = SpdLoadH(FfxInt32x2(i0), slice); + FfxFloat16x4 v1 = SpdLoadH(FfxInt32x2(i1), slice); + FfxFloat16x4 v2 = SpdLoadH(FfxInt32x2(i2), slice); + FfxFloat16x4 v3 = SpdLoadH(FfxInt32x2(i3), slice); + return SpdReduce4H(v0, v1, v2, v3); +} + +FfxFloat16x4 SpdReduceLoad4H(FfxUInt32x2 base, FfxUInt32 slice) +{ + return SpdReduceLoad4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice); +} + +FfxFloat16x4 SpdReduceLoadSourceImage4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice) +{ + FfxFloat16x4 v0 = SpdLoadSourceImageH(FfxInt32x2(i0), slice); + FfxFloat16x4 v1 = SpdLoadSourceImageH(FfxInt32x2(i1), slice); + FfxFloat16x4 v2 = SpdLoadSourceImageH(FfxInt32x2(i2), slice); + FfxFloat16x4 v3 = SpdLoadSourceImageH(FfxInt32x2(i3), slice); + return SpdReduce4H(v0, v1, v2, v3); +} + +FfxFloat16x4 SpdReduceLoadSourceImageH(FfxUInt32x2 base, FfxUInt32 slice) +{ +#if defined(SPD_LINEAR_SAMPLER) + return SpdLoadSourceImageH(FfxInt32x2(base), slice); +#else + return SpdReduceLoadSourceImage4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice); +#endif +} + +void SpdDownsampleMips_0_1_IntrinsicsH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice) +{ + FfxFloat16x4 v[4]; + + FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2); + FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y); + v[0] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[0], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y); + v[1] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[1], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16); + v[2] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[2], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16); + v[3] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[3], 0, slice); + + if (mips <= 1) + return; + + v[0] = SpdReduceQuadH(v[0]); + v[1] = SpdReduceQuadH(v[1]); + v[2] = SpdReduceQuadH(v[2]); + v[3] = SpdReduceQuadH(v[3]); + + if ((localInvocationIndex % 4) == 0) + { + SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2), v[0], 1, slice); + SpdStoreIntermediateH(x / 2, y / 2, v[0]); + + SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2), v[1], 1, slice); + SpdStoreIntermediateH(x / 2 + 8, y / 2, v[1]); + + SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2 + 8), v[2], 1, slice); + SpdStoreIntermediateH(x / 2, y / 2 + 8, v[2]); + + SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2 + 8), v[3], 1, slice); + SpdStoreIntermediateH(x / 2 + 8, y / 2 + 8, v[3]); + } +} + +void SpdDownsampleMips_0_1_LDSH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice) +{ + FfxFloat16x4 v[4]; + + FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2); + FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y); + v[0] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[0], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y); + v[1] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[1], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16); + v[2] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[2], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16); + v[3] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[3], 0, slice); + + if (mips <= 1) + return; + + for (FfxInt32 i = 0; i < 4; i++) + { + SpdStoreIntermediateH(x, y, v[i]); + ffxSpdWorkgroupShuffleBarrier(); + if (localInvocationIndex < 64) + { + v[i] = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); + SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice); + } + ffxSpdWorkgroupShuffleBarrier(); + } + + if (localInvocationIndex < 64) + { + SpdStoreIntermediateH(x + 0, y + 0, v[0]); + SpdStoreIntermediateH(x + 8, y + 0, v[1]); + SpdStoreIntermediateH(x + 0, y + 8, v[2]); + SpdStoreIntermediateH(x + 8, y + 8, v[3]); + } +} + +void SpdDownsampleMips_0_1H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice) +{ +#if defined(FFX_SPD_NO_WAVE_OPERATIONS) + SpdDownsampleMips_0_1_LDSH(x, y, workGroupID, localInvocationIndex, mips, slice); +#else + SpdDownsampleMips_0_1_IntrinsicsH(x, y, workGroupID, localInvocationIndex, mips, slice); +#endif +} + + +void SpdDownsampleMip_2H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#if defined(FFX_SPD_NO_WAVE_OPERATIONS) + if (localInvocationIndex < 64) + { + FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); + SpdStoreH(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x, y), v, mip, slice); + // store to LDS, try to reduce bank conflicts + // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 + // ... + // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 + SpdStoreIntermediateH(x * 2 + y % 2, y * 2, v); + } +#else + FfxFloat16x4 v = SpdLoadIntermediateH(x, y); + v = SpdReduceQuadH(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStoreH(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x / 2, y / 2), v, mip, slice); + SpdStoreIntermediateH(x + (y / 2) % 2, y, v); + } +#endif +} + +void SpdDownsampleMip_3H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#if defined(FFX_SPD_NO_WAVE_OPERATIONS) + if (localInvocationIndex < 16) + { + // x 0 x 0 + // 0 0 0 0 + // 0 x 0 x + // 0 0 0 0 + FfxFloat16x4 v = + SpdReduceIntermediateH(FfxUInt32x2(x * 4 + 0 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 2 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 0 + 1, y * 4 + 2), FfxUInt32x2(x * 4 + 2 + 1, y * 4 + 2)); + SpdStoreH(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x, y), v, mip, slice); + // store to LDS + // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 + // ... + // 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 + // ... + // 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x + // ... + SpdStoreIntermediateH(x * 4 + y, y * 4, v); + } +#else + if (localInvocationIndex < 64) + { + FfxFloat16x4 v = SpdLoadIntermediateH(x * 2 + y % 2, y * 2); + v = SpdReduceQuadH(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStoreH(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x / 2, y / 2), v, mip, slice); + SpdStoreIntermediateH(x * 2 + y / 2, y * 2, v); + } + } +#endif +} + +void SpdDownsampleMip_4H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#if defined(FFX_SPD_NO_WAVE_OPERATIONS) + if (localInvocationIndex < 4) + { + // x 0 0 0 x 0 0 0 + // ... + // 0 x 0 0 0 x 0 0 + FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 8 + 0 + 0 + y * 2, y * 8 + 0), + FfxUInt32x2(x * 8 + 4 + 0 + y * 2, y * 8 + 0), + FfxUInt32x2(x * 8 + 0 + 1 + y * 2, y * 8 + 4), + FfxUInt32x2(x * 8 + 4 + 1 + y * 2, y * 8 + 4)); + SpdStoreH(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x, y), v, mip, slice); + // store to LDS + // x x x x 0 ... + // 0 ... + SpdStoreIntermediateH(x + y * 2, 0, v); + } +#else + if (localInvocationIndex < 16) + { + FfxFloat16x4 v = SpdLoadIntermediateH(x * 4 + y, y * 4); + v = SpdReduceQuadH(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStoreH(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x / 2, y / 2), v, mip, slice); + SpdStoreIntermediateH(x / 2 + y, 0, v); + } + } +#endif +} + +void SpdDownsampleMip_5H(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#if defined(FFX_SPD_NO_WAVE_OPERATIONS) + if (localInvocationIndex < 1) + { + // x x x x 0 ... + // 0 ... + FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(0, 0), FfxUInt32x2(1, 0), FfxUInt32x2(2, 0), FfxUInt32x2(3, 0)); + SpdStoreH(FfxInt32x2(workGroupID.xy), v, mip, slice); + } +#else + if (localInvocationIndex < 4) + { + FfxFloat16x4 v = SpdLoadIntermediateH(localInvocationIndex, 0); + v = SpdReduceQuadH(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStoreH(FfxInt32x2(workGroupID.xy), v, mip, slice); + } + } +#endif +} + +void SpdDownsampleMips_6_7H(FfxUInt32 x, FfxUInt32 y, FfxUInt32 mips, FfxUInt32 slice) +{ + FfxInt32x2 tex = FfxInt32x2(x * 4 + 0, y * 4 + 0); + FfxInt32x2 pix = FfxInt32x2(x * 2 + 0, y * 2 + 0); + FfxFloat16x4 v0 = SpdReduceLoad4H(tex, slice); + SpdStoreH(pix, v0, 6, slice); + + tex = FfxInt32x2(x * 4 + 2, y * 4 + 0); + pix = FfxInt32x2(x * 2 + 1, y * 2 + 0); + FfxFloat16x4 v1 = SpdReduceLoad4H(tex, slice); + SpdStoreH(pix, v1, 6, slice); + + tex = FfxInt32x2(x * 4 + 0, y * 4 + 2); + pix = FfxInt32x2(x * 2 + 0, y * 2 + 1); + FfxFloat16x4 v2 = SpdReduceLoad4H(tex, slice); + SpdStoreH(pix, v2, 6, slice); + + tex = FfxInt32x2(x * 4 + 2, y * 4 + 2); + pix = FfxInt32x2(x * 2 + 1, y * 2 + 1); + FfxFloat16x4 v3 = SpdReduceLoad4H(tex, slice); + SpdStoreH(pix, v3, 6, slice); + + if (mips < 8) + return; + // no barrier needed, working on values only from the same thread + + FfxFloat16x4 v = SpdReduce4H(v0, v1, v2, v3); + SpdStoreH(FfxInt32x2(x, y), v, 7, slice); + SpdStoreIntermediateH(x, y, v); +} + +void SpdDownsampleNextFourH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 baseMip, FfxUInt32 mips, FfxUInt32 slice) +{ + if (mips <= baseMip) + return; + ffxSpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_2H(x, y, workGroupID, localInvocationIndex, baseMip, slice); + + if (mips <= baseMip + 1) + return; + ffxSpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_3H(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice); + + if (mips <= baseMip + 2) + return; + ffxSpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_4H(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice); + + if (mips <= baseMip + 3) + return; + ffxSpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_5H(workGroupID, localInvocationIndex, baseMip + 3, slice); +} + +/// Downsamples a 64x64 tile based on the work group id and work group offset. +/// If after downsampling it's the last active thread group, computes the remaining MIP levels. +/// Uses half types. +/// +/// @param [in] workGroupID index of the work group / thread group +/// @param [in] localInvocationIndex index of the thread within the thread group in 1D +/// @param [in] mips the number of total MIP levels to compute for the input texture +/// @param [in] numWorkGroups the total number of dispatched work groups / thread groups for this slice +/// @param [in] slice the slice of the input texture +/// +/// @ingroup FfxGPUSpd +void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice) +{ + FfxUInt32x2 sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64); + FfxUInt32 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2); + FfxUInt32 y = sub_xy.y + 8 * ((localInvocationIndex >> 7)); + + // compute MIP level 0 and 1 + SpdDownsampleMips_0_1H(x, y, workGroupID, localInvocationIndex, mips, slice); + + // compute MIP level 2, 3, 4, 5 + SpdDownsampleNextFourH(x, y, workGroupID, localInvocationIndex, 2, mips, slice); + + if (mips < 7) + return; + + // increase the global atomic counter for the given slice and check if it's the last remaining thread group: + // terminate if not, continue if yes. + if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice)) + return; + + // reset the global atomic counter back to 0 for the next spd dispatch + SpdResetAtomicCounter(slice); + + // After mip 5 there is only a single workgroup left that downsamples the remaining up to 64x64 texels. + // compute MIP level 6 and 7 + SpdDownsampleMips_6_7H(x, y, mips, slice); + + // compute MIP level 8, 9, 10, 11 + SpdDownsampleNextFourH(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice); +} + +/// Downsamples a 64x64 tile based on the work group id and work group offset. +/// If after downsampling it's the last active thread group, computes the remaining MIP levels. +/// Uses half types. +/// +/// @param [in] workGroupID index of the work group / thread group +/// @param [in] localInvocationIndex index of the thread within the thread group in 1D +/// @param [in] mips the number of total MIP levels to compute for the input texture +/// @param [in] numWorkGroups the total number of dispatched work groups / thread groups for this slice +/// @param [in] slice the slice of the input texture +/// @param [in] workGroupOffset the work group offset. it's (0,0) in case the entire input texture is downsampled. +/// +/// @ingroup FfxGPUSpd +void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset) +{ + SpdDownsampleH(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice); +} + +#endif // #if FFX_HALF +#endif // #if defined(FFX_GPU) diff --git a/Shaders/shaders/spd/ffx_spd.h.meta b/Shaders/shaders/spd/ffx_spd.h.meta new file mode 100644 index 0000000..2739093 --- /dev/null +++ b/Shaders/shaders/spd/ffx_spd.h.meta @@ -0,0 +1,60 @@ +fileFormatVersion: 2 +guid: fe8e621d12c641c4297675f3015697fc +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 1 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude WebGL: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/package.json b/package.json new file mode 100644 index 0000000..d672c3f --- /dev/null +++ b/package.json @@ -0,0 +1,12 @@ +{ + "name": "fidelityfx.fsr", + "version": "1.0.0", + "displayName": "FidelityFX FSR", + "description": "FidelityFX Super Resolution 2/3 Upscaler core assets", + "unity": "2020.1", + "documentationUrl": "https://github.com/ndepoel/FSR2Unity", + "author": { + "name": "Nico de Poel", + "email": "ndepoel@gmail.com.com" + } +} \ No newline at end of file diff --git a/package.json.meta b/package.json.meta new file mode 100644 index 0000000..c3ad2ad --- /dev/null +++ b/package.json.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 202b84e3e446cc544a82954c2235ec60 +PackageManifestImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: