From d65cc3a35de270a1011c8158810e3d56c579953f Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Tue, 29 Oct 2024 18:30:00 +0100 Subject: [PATCH] Added standalone CAS sharpening shader --- Runtime/Common/ConstantsBuffer.cs | 50 + Runtime/Common/ConstantsBuffer.cs.meta | 3 + Shaders/ffx_cas_sharpen_pass.compute | 11 + Shaders/ffx_cas_sharpen_pass.compute.meta | 8 + Shaders/shaders/cas.meta | 8 + Shaders/shaders/cas/ffx_cas.h | 1271 +++++++++++++++++ Shaders/shaders/cas/ffx_cas.h.meta | 65 + Shaders/shaders/cas/ffx_cas_callbacks_hlsl.h | 226 +++ .../shaders/cas/ffx_cas_callbacks_hlsl.h.meta | 65 + Shaders/shaders/cas/ffx_cas_resources.h | 41 + Shaders/shaders/cas/ffx_cas_resources.h.meta | 65 + Shaders/shaders/cas/ffx_cas_sharpen.h | 89 ++ Shaders/shaders/cas/ffx_cas_sharpen.h.meta | 65 + Shaders/shaders/ffx_cas_sharpen_pass.hlsl | 54 + .../shaders/ffx_cas_sharpen_pass.hlsl.meta | 7 + 15 files changed, 2028 insertions(+) create mode 100644 Runtime/Common/ConstantsBuffer.cs create mode 100644 Runtime/Common/ConstantsBuffer.cs.meta create mode 100644 Shaders/ffx_cas_sharpen_pass.compute create mode 100644 Shaders/ffx_cas_sharpen_pass.compute.meta create mode 100644 Shaders/shaders/cas.meta create mode 100644 Shaders/shaders/cas/ffx_cas.h create mode 100644 Shaders/shaders/cas/ffx_cas.h.meta create mode 100644 Shaders/shaders/cas/ffx_cas_callbacks_hlsl.h create mode 100644 Shaders/shaders/cas/ffx_cas_callbacks_hlsl.h.meta create mode 100644 Shaders/shaders/cas/ffx_cas_resources.h create mode 100644 Shaders/shaders/cas/ffx_cas_resources.h.meta create mode 100644 Shaders/shaders/cas/ffx_cas_sharpen.h create mode 100644 Shaders/shaders/cas/ffx_cas_sharpen.h.meta create mode 100644 Shaders/shaders/ffx_cas_sharpen_pass.hlsl create mode 100644 Shaders/shaders/ffx_cas_sharpen_pass.hlsl.meta diff --git a/Runtime/Common/ConstantsBuffer.cs b/Runtime/Common/ConstantsBuffer.cs new file mode 100644 index 0000000..98fbe54 --- /dev/null +++ b/Runtime/Common/ConstantsBuffer.cs @@ -0,0 +1,50 @@ +using System.Runtime.InteropServices; +using UnityEngine; +using UnityEngine.Rendering; + +namespace FidelityFX +{ + /// + /// Convenience class for handling a constants buffer containing a single struct item. + /// This wraps the compute buffer and the value array, as well as providing easy access to both. + /// + public class ConstantsBuffer + where TConst: struct + { + private ComputeBuffer _computeBuffer; + + private readonly TConst[] _constArray = { new TConst() }; + public ref TConst Value => ref _constArray[0]; + + public static ConstantsBuffer Create() + { + ConstantsBuffer buffer = new(); + buffer.Init(); + return buffer; + } + + public void Init() + { + _computeBuffer = new ComputeBuffer(1, Marshal.SizeOf(), ComputeBufferType.Constant); + } + + public void UpdateBufferData(CommandBuffer commandBuffer) + { + commandBuffer.SetBufferData(_computeBuffer, _constArray); + } + + public void Destroy() + { + if (_computeBuffer == null) + return; + + _computeBuffer.Release(); + _computeBuffer = null; + } + + public static implicit operator ComputeBuffer(ConstantsBuffer constants) + { + return constants._computeBuffer; + } + } +} diff --git a/Runtime/Common/ConstantsBuffer.cs.meta b/Runtime/Common/ConstantsBuffer.cs.meta new file mode 100644 index 0000000..8cacdb6 --- /dev/null +++ b/Runtime/Common/ConstantsBuffer.cs.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: c423d73295de440dade5b92337efc50a +timeCreated: 1729668780 \ No newline at end of file diff --git a/Shaders/ffx_cas_sharpen_pass.compute b/Shaders/ffx_cas_sharpen_pass.compute new file mode 100644 index 0000000..a8bd30d --- /dev/null +++ b/Shaders/ffx_cas_sharpen_pass.compute @@ -0,0 +1,11 @@ +#pragma kernel CS + +#define FFX_CAS_OPTION_SHARPEN_ONLY 1 +#define FFX_CAS_COLOR_SPACE_CONVERSION 0 // Linear color space + +#define FFX_GPU // Compiling for GPU +#define FFX_HLSL // Compile for plain HLSL + +#pragma warning(disable: 3571) + +#include "shaders/ffx_cas_sharpen_pass.hlsl" diff --git a/Shaders/ffx_cas_sharpen_pass.compute.meta b/Shaders/ffx_cas_sharpen_pass.compute.meta new file mode 100644 index 0000000..1369384 --- /dev/null +++ b/Shaders/ffx_cas_sharpen_pass.compute.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 00e3ffafadd35564780d8a12adcbeff7 +ComputeShaderImporter: + externalObjects: {} + preprocessorOverride: 0 + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/cas.meta b/Shaders/shaders/cas.meta new file mode 100644 index 0000000..a2a4885 --- /dev/null +++ b/Shaders/shaders/cas.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 94edab5297308bd4fae936da8ce22a37 +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/cas/ffx_cas.h b/Shaders/shaders/cas/ffx_cas.h new file mode 100644 index 0000000..3ef2ad7 --- /dev/null +++ b/Shaders/shaders/cas/ffx_cas.h @@ -0,0 +1,1271 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +/// @defgroup FfxGPUCas FidelityFX CAS +/// FidelityFX Contrast Adaptive Sharpening GPU documentation +/// +/// @ingroup FfxGPUEffects + +/// The maximum scaling ratio that CAS can support. +/// +/// @ingroup FfxGPUCas +#define FFX_CAS_AREA_LIMIT (4.0) + +/// A function to check if the scaling ratio is supported by CAS. +/// +/// Contrast Adaptive Sharpening (CAS) supports a maximum scaling ratio expressed in FFX_CAS_AREA_LIMIT. +/// +/// @param [in] outX The width of the target output, expressed in pixels. +/// @param [in] outY The height of the target output, expressed in pixels. +/// @param [in] inX The width of the input surface, expressed in pixels. +/// @param [in] inY The height of the input surface, expressed in pixels. +/// +/// @returns +/// True if CAS supports scaling in the given configuration. +/// +/// @ingroup FfxGPUCas +FfxUInt32 ffxCasSupportScaling( + FFX_PARAMETER_IN FfxFloat32 outX, + FFX_PARAMETER_IN FfxFloat32 outY, + FFX_PARAMETER_IN FfxFloat32 inX, + FFX_PARAMETER_IN FfxFloat32 inY) +{ + return FfxUInt32(((outX * outY) * ffxReciprocal(inX * inY)) <= FFX_CAS_AREA_LIMIT); +} + +/// Call to setup required constant values (works on CPU or GPU). +/// +/// @param [out] const0 The first 4 32-bit values of the constant buffer which is populated by this function. +/// @param [out] const1 The second 4 32-bit values of the constant buffer which is populated by this function. +/// @param [in] sharpness Set to 0 for the default (lower ringing), 1 for maximum (higest ringing). +/// @param [in] inputSizeInPixelsX The size of the input resolution in the X dimension. +/// @param [in] inputSizeInPixelsY The size of the input resolution in the Y dimension. +/// @param [in] outputSizeInPixelsX The size of the output resolution in the X dimension. +/// @param [in] outputSizeInPixelsY The size of the output resolution in the Y dimension. +/// +/// @ingroup FfxGPUCas +FFX_STATIC void ffxCasSetup( + FFX_PARAMETER_INOUT FfxUInt32x4 const0, + FFX_PARAMETER_INOUT FfxUInt32x4 const1, + FFX_PARAMETER_IN FfxFloat32 sharpness, + FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsX, + FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsY, + FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsX, + FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsY) +{ + // Scaling terms. + const0[0] = ffxAsUInt32(inputSizeInPixelsX * ffxReciprocal(outputSizeInPixelsX)); + const0[1] = ffxAsUInt32(inputSizeInPixelsY * ffxReciprocal(outputSizeInPixelsY)); + const0[2] = ffxAsUInt32(FfxFloat32(0.5) * inputSizeInPixelsX * ffxReciprocal(outputSizeInPixelsX) - FfxFloat32(0.5)); + const0[3] = ffxAsUInt32(FfxFloat32(0.5) * inputSizeInPixelsY * ffxReciprocal(outputSizeInPixelsY) - FfxFloat32(0.5)); + + // Sharpness value. + FfxFloat32 sharp = -ffxReciprocal(ffxLerp(8.0, 5.0, ffxSaturate(sharpness))); + FfxFloat32x2 hSharp = {sharp, 0.0}; + const1[0] = ffxAsUInt32(sharp); + const1[1] = ffxPackHalf2x16(hSharp); + const1[2] = ffxAsUInt32(FfxFloat32(8.0) * inputSizeInPixelsX * ffxReciprocal(outputSizeInPixelsX)); + const1[3] = 0; +} + +#if defined(FFX_GPU) +#if defined(FFX_CAS_PACKED_ONLY) +// Avoid compiler errors by including default implementations of these callbacks. +FfxFloat32x3 casLoad(FFX_PARAMETER_IN FfxInt32x2 position) +{ + return FfxFloat32x3(0.0, 0.0, 0.0); +} + +void casInput( + FFX_PARAMETER_INOUT FfxFloat32 red, + FFX_PARAMETER_INOUT FfxFloat32 green, + FFX_PARAMETER_INOUT FfxFloat32 blue) +{ +} +#endif // #if defined(FFX_CAS_PACKED_ONLY) + +// No scaling algorithm uses minimal 3x3 pixel neighborhood. +void casFilterNoScaling( + FFX_PARAMETER_OUT FfxFloat32 outPixelRed, + FFX_PARAMETER_OUT FfxFloat32 outPixelGreen, + FFX_PARAMETER_OUT FfxFloat32 outPixelBlue, + FFX_PARAMETER_IN FfxUInt32x2 samplePosition, + FFX_PARAMETER_IN FfxUInt32x4 const0, + FFX_PARAMETER_IN FfxUInt32x4 const1) +{ + // Load a collection of samples in a 3x3 neighorhood, where e is the current pixel. + // a b c + // d e f + // g h i + FfxFloat32x3 sampleA = casLoad(FfxInt32x2(samplePosition) + FfxInt32x2(-1, -1)); + FfxFloat32x3 sampleB = casLoad(FfxInt32x2(samplePosition) + FfxInt32x2(0, -1)); + FfxFloat32x3 sampleC = casLoad(FfxInt32x2(samplePosition) + FfxInt32x2(1, -1)); + FfxFloat32x3 sampleD = casLoad(FfxInt32x2(samplePosition) + FfxInt32x2(-1, 0)); + FfxFloat32x3 sampleE = casLoad(FfxInt32x2(samplePosition)); + FfxFloat32x3 sampleF = casLoad(FfxInt32x2(samplePosition) + FfxInt32x2(1, 0)); + FfxFloat32x3 sampleG = casLoad(FfxInt32x2(samplePosition) + FfxInt32x2(-1, 1)); + FfxFloat32x3 sampleH = casLoad(FfxInt32x2(samplePosition) + FfxInt32x2(0, 1)); + FfxFloat32x3 sampleI = casLoad(FfxInt32x2(samplePosition) + FfxInt32x2(1, 1)); + + // Run optional input transform. + casInput(sampleA.r, sampleA.g, sampleA.b); + casInput(sampleB.r, sampleB.g, sampleB.b); + casInput(sampleC.r, sampleC.g, sampleC.b); + casInput(sampleD.r, sampleD.g, sampleD.b); + casInput(sampleE.r, sampleE.g, sampleE.b); + casInput(sampleF.r, sampleF.g, sampleF.b); + casInput(sampleG.r, sampleG.g, sampleG.b); + casInput(sampleH.r, sampleH.g, sampleH.b); + casInput(sampleI.r, sampleI.g, sampleI.b); + + // Soft min and max. + // a b c b + // d e f * 0.5 + d e f * 0.5 + // g h i h + // These are 2.0x bigger (factored out the extra multiply). + FfxFloat32 minimumRed = ffxMin3(ffxMin3(sampleD.r, sampleE.r, sampleF.r), sampleB.r, sampleH.r); + FfxFloat32 minimumGreen = ffxMin3(ffxMin3(sampleD.g, sampleE.g, sampleF.g), sampleB.g, sampleH.g); + FfxFloat32 minimumBlue = ffxMin3(ffxMin3(sampleD.b, sampleE.b, sampleF.b), sampleB.b, sampleH.b); + +#if defined(FFX_CAS_BETTER_DIAGONALS) + FfxFloat32 minimumRed2 = ffxMin3(ffxMin3(minimumRed, sampleA.r, sampleC.r), sampleG.r, sampleI.r); + FfxFloat32 minimumGreen2 = ffxMin3(ffxMin3(minimumGreen, sampleA.g, sampleC.g), sampleG.g, sampleI.g); + FfxFloat32 minimumBlue2 = ffxMin3(ffxMin3(minimumBlue, sampleA.b, sampleC.b), sampleG.b, sampleI.b); + minimumRed = minimumRed + minimumRed2; + minimumGreen = minimumGreen + minimumGreen2; + minimumBlue = minimumBlue + minimumBlue2; +#endif // #if defined(FFX_CAS_BETTER_DIAGONALS) + + FfxFloat32 maximumRed = ffxMax3(ffxMax3(sampleD.r, sampleE.r, sampleF.r), sampleB.r, sampleH.r); + FfxFloat32 maximumGreen = ffxMax3(ffxMax3(sampleD.g, sampleE.g, sampleF.g), sampleB.g, sampleH.g); + FfxFloat32 maximumBlue = ffxMax3(ffxMax3(sampleD.b, sampleE.b, sampleF.b), sampleB.b, sampleH.b); + +#if defined(FFX_CAS_BETTER_DIAGONALS) + FfxFloat32 maximumRed2 = ffxMax3(ffxMax3(maximumRed, sampleA.r, sampleC.r), sampleG.r, sampleI.r); + FfxFloat32 maximumGreen2 = ffxMax3(ffxMax3(maximumGreen, sampleA.g, sampleC.g), sampleG.g, sampleI.g); + FfxFloat32 maximumBlue2 = ffxMax3(ffxMax3(maximumBlue, sampleA.b, sampleC.b), sampleG.b, sampleI.b); + maximumRed = maximumRed + maximumRed2; + maximumGreen = maximumGreen + maximumGreen2; + maximumBlue = maximumBlue + maximumBlue2; +#endif // #if defined(FFX_CAS_BETTER_DIAGONALS) + + // Smooth minimum distance to signal limit divided by smooth max. +#if defined(FFX_CAS_USE_PRECISE_MATH) + FfxFloat32 reciprocalMaximumRed = ffxReciprocal(maximumRed); + FfxFloat32 reciprocalMaximumGreen = ffxReciprocal(maximumGreen); + FfxFloat32 reciprocalMaximumBlue = ffxReciprocal(maximumBlue); +#else + FfxFloat32 reciprocalMaximumRed = ffxApproximateReciprocal(maximumRed); + FfxFloat32 reciprocalMaximumGreen = ffxApproximateReciprocal(maximumGreen); + FfxFloat32 reciprocalMaximumBlue = ffxApproximateReciprocal(maximumBlue); +#endif // #if defined(FFX_CAS_USE_PRECISE_MATH) + +#if defined(FFX_CAS_USE_PRECISE_MATH) + FfxFloat32 amplifyRed = ffxSaturate(ffxMin(minimumRed, FfxFloat32(2.0) - maximumRed) * reciprocalMaximumRed); + FfxFloat32 amplifyGreen = ffxSaturate(ffxMin(minimumGreen, FfxFloat32(2.0) - maximumGreen) * reciprocalMaximumGreen); + FfxFloat32 amplifyBlue = ffxSaturate(ffxMin(minimumBlue, FfxFloat32(2.0) - maximumBlue) * reciprocalMaximumBlue); +#else + FfxFloat32 amplifyRed = ffxSaturate(ffxMin(minimumRed, FfxFloat32(1.0) - maximumRed) * reciprocalMaximumRed); + FfxFloat32 amplifyGreen = ffxSaturate(ffxMin(minimumGreen, FfxFloat32(1.0) - maximumGreen) * reciprocalMaximumGreen); + FfxFloat32 amplifyBlue = ffxSaturate(ffxMin(minimumBlue, FfxFloat32(1.0) - maximumBlue) * reciprocalMaximumBlue); +#endif // #if defined(FFX_CAS_USE_PRECISE_MATH) + + // Shaping amount of sharpening. +#if defined(FFX_CAS_USE_PRECISE_MATH) + amplifyRed = ffxSqrt(amplifyRed); + amplifyGreen = ffxSqrt(amplifyGreen); + amplifyBlue = ffxSqrt(amplifyBlue); +#else + amplifyRed = ffxApproximateSqrt(amplifyRed); + amplifyGreen = ffxApproximateSqrt(amplifyGreen); + amplifyBlue = ffxApproximateSqrt(amplifyBlue); +#endif // #if defined(FFX_CAS_USE_PRECISE_MATH) + + // Filter shape. + // 0 w 0 + // w 1 w + // 0 w 0 + FfxFloat32 peak = ffxAsFloat(const1.x); + FfxFloat32x3 weight = FfxFloat32x3(amplifyRed * peak, amplifyGreen * peak, amplifyBlue * peak); + + // Filter using green coef only, depending on dead code removal to strip out the extra overhead. +#if defined(FFX_CAS_USE_PRECISE_MATH) + FfxFloat32 reciprocalWeight = ffxReciprocal(FfxFloat32(1.0) + FfxFloat32(4.0) * weight.g); +#else + FfxFloat32 reciprocalWeight = ffxApproximateReciprocalMedium(FfxFloat32(1.0) + FfxFloat32(4.0) * weight.g); +#endif // #if defined(FFX_CAS_USE_PRECISE_MATH) + + outPixelRed = ffxSaturate((sampleB.r * weight.g + sampleD.r * weight.g + sampleF.r * weight.g + sampleH.r * weight.g + sampleE.r) * reciprocalWeight); + outPixelGreen = ffxSaturate((sampleB.g * weight.g + sampleD.g * weight.g + sampleF.g * weight.g + sampleH.g * weight.g + sampleE.g) * reciprocalWeight); + outPixelBlue = ffxSaturate((sampleB.b * weight.g + sampleD.b * weight.g + sampleF.b * weight.g + sampleH.b * weight.g + sampleE.b) * reciprocalWeight); +} + +#if FFX_HALF == 1 +// Half precision version algorithm with no scaling and filters 2 tiles in one run. +void casFilterNoScalingHalf( + FFX_PARAMETER_OUT FfxFloat16x2 outPixelRed, + FFX_PARAMETER_OUT FfxFloat16x2 outPixelGreen, + FFX_PARAMETER_OUT FfxFloat16x2 outPixelBlue, + FFX_PARAMETER_IN FfxUInt32x2 samplePosition, + FFX_PARAMETER_IN FfxUInt32x4 const0, + FFX_PARAMETER_IN FfxUInt32x4 const1) +{ + FfxInt16x2 samplePosition0 = FfxInt16x2(samplePosition); + FfxFloat16x3 sampleA0 = casLoadHalf(samplePosition0 + FfxInt16x2(-1, -1)); + FfxFloat16x3 sampleB0 = casLoadHalf(samplePosition0 + FfxInt16x2(0, -1)); + FfxFloat16x3 sampleC0 = casLoadHalf(samplePosition0 + FfxInt16x2(1, -1)); + FfxFloat16x3 sampleD0 = casLoadHalf(samplePosition0 + FfxInt16x2(-1, 0)); + FfxFloat16x3 sampleE0 = casLoadHalf(samplePosition0); + FfxFloat16x3 sampleF0 = casLoadHalf(samplePosition0 + FfxInt16x2(1, 0)); + FfxFloat16x3 sampleG0 = casLoadHalf(samplePosition0 + FfxInt16x2(-1, 1)); + FfxFloat16x3 sampleH0 = casLoadHalf(samplePosition0 + FfxInt16x2(0, 1)); + FfxFloat16x3 sampleI0 = casLoadHalf(samplePosition0 + FfxInt16x2(1, 1)); + FfxInt16x2 samplePosition1 = samplePosition0 + FfxInt16x2(8, 0); + FfxFloat16x3 sampleA1 = casLoadHalf(samplePosition1 + FfxInt16x2(-1, -1)); + FfxFloat16x3 sampleB1 = casLoadHalf(samplePosition1 + FfxInt16x2(0, -1)); + FfxFloat16x3 sampleC1 = casLoadHalf(samplePosition1 + FfxInt16x2(1, -1)); + FfxFloat16x3 sampleD1 = casLoadHalf(samplePosition1 + FfxInt16x2(-1, 0)); + FfxFloat16x3 sampleE1 = casLoadHalf(samplePosition1); + FfxFloat16x3 sampleF1 = casLoadHalf(samplePosition1 + FfxInt16x2(1, 0)); + FfxFloat16x3 sampleG1 = casLoadHalf(samplePosition1 + FfxInt16x2(-1, 1)); + FfxFloat16x3 sampleH1 = casLoadHalf(samplePosition1 + FfxInt16x2(0, 1)); + FfxFloat16x3 sampleI1 = casLoadHalf(samplePosition1 + FfxInt16x2(1, 1)); + + // AOS to SOA conversion. + FfxFloat16x2 aR = FfxFloat16x2(sampleA0.r, sampleA1.r); + FfxFloat16x2 aG = FfxFloat16x2(sampleA0.g, sampleA1.g); + FfxFloat16x2 aB = FfxFloat16x2(sampleA0.b, sampleA1.b); + FfxFloat16x2 bR = FfxFloat16x2(sampleB0.r, sampleB1.r); + FfxFloat16x2 bG = FfxFloat16x2(sampleB0.g, sampleB1.g); + FfxFloat16x2 bB = FfxFloat16x2(sampleB0.b, sampleB1.b); + FfxFloat16x2 cR = FfxFloat16x2(sampleC0.r, sampleC1.r); + FfxFloat16x2 cG = FfxFloat16x2(sampleC0.g, sampleC1.g); + FfxFloat16x2 cB = FfxFloat16x2(sampleC0.b, sampleC1.b); + FfxFloat16x2 dR = FfxFloat16x2(sampleD0.r, sampleD1.r); + FfxFloat16x2 dG = FfxFloat16x2(sampleD0.g, sampleD1.g); + FfxFloat16x2 dB = FfxFloat16x2(sampleD0.b, sampleD1.b); + FfxFloat16x2 eR = FfxFloat16x2(sampleE0.r, sampleE1.r); + FfxFloat16x2 eG = FfxFloat16x2(sampleE0.g, sampleE1.g); + FfxFloat16x2 eB = FfxFloat16x2(sampleE0.b, sampleE1.b); + FfxFloat16x2 fR = FfxFloat16x2(sampleF0.r, sampleF1.r); + FfxFloat16x2 fG = FfxFloat16x2(sampleF0.g, sampleF1.g); + FfxFloat16x2 fB = FfxFloat16x2(sampleF0.b, sampleF1.b); + FfxFloat16x2 gR = FfxFloat16x2(sampleG0.r, sampleG1.r); + FfxFloat16x2 gG = FfxFloat16x2(sampleG0.g, sampleG1.g); + FfxFloat16x2 gB = FfxFloat16x2(sampleG0.b, sampleG1.b); + FfxFloat16x2 hR = FfxFloat16x2(sampleH0.r, sampleH1.r); + FfxFloat16x2 hG = FfxFloat16x2(sampleH0.g, sampleH1.g); + FfxFloat16x2 hB = FfxFloat16x2(sampleH0.b, sampleH1.b); + FfxFloat16x2 iR = FfxFloat16x2(sampleI0.r, sampleI1.r); + FfxFloat16x2 iG = FfxFloat16x2(sampleI0.g, sampleI1.g); + FfxFloat16x2 iB = FfxFloat16x2(sampleI0.b, sampleI1.b); + + // Run optional input transform. + casInputHalf(aR, aG, aB); + casInputHalf(bR, bG, bB); + casInputHalf(cR, cG, cB); + casInputHalf(dR, dG, dB); + casInputHalf(eR, eG, eB); + casInputHalf(fR, fG, fB); + casInputHalf(gR, gG, gB); + casInputHalf(hR, hG, hB); + casInputHalf(iR, iG, iB); + + // Soft min and max. + FfxFloat16x2 minimumRed = ffxMin(ffxMin(fR, hR), ffxMin(ffxMin(bR, dR), eR)); + FfxFloat16x2 minimumGreen = ffxMin(ffxMin(fG, hG), ffxMin(ffxMin(bG, dG), eG)); + FfxFloat16x2 minimumBlue = ffxMin(ffxMin(fB, hB), ffxMin(ffxMin(bB, dB), eB)); + +#if defined(FFX_CAS_BETTER_DIAGONALS) + FfxFloat16x2 minimumRed2 = ffxMin(ffxMin(gR, iR), ffxMin(ffxMin(aR, cR), minimumRed)); + FfxFloat16x2 minimumGreen2 = ffxMin(ffxMin(gG, iG), ffxMin(ffxMin(aG, cG), minimumGreen)); + FfxFloat16x2 minimumBlue2 = ffxMin(ffxMin(gB, iB), ffxMin(ffxMin(aB, cB), minimumBlue)); + minimumRed = minimumRed + minimumRed2; + minimumGreen = minimumGreen + minimumGreen2; + minimumBlue = minimumBlue + minimumBlue2; +#endif // #if defined(FFX_CAS_BETTER_DIAGONALS) + + FfxFloat16x2 maximumRed = max(max(fR, hR), max(max(bR, dR), eR)); + FfxFloat16x2 maximumGreen = max(max(fG, hG), max(max(bG, dG), eG)); + FfxFloat16x2 maximumBlue = max(max(fB, hB), max(max(bB, dB), eB)); + +#if defined(FFX_CAS_BETTER_DIAGONALS) + FfxFloat16x2 maximumRed2 = max(max(gR, iR), max(max(aR, cR), maximumRed)); + FfxFloat16x2 maximumGreen2 = max(max(gG, iG), max(max(aG, cG), maximumGreen)); + FfxFloat16x2 maximumBlue2 = max(max(gB, iB), max(max(aB, cB), maximumBlue)); + maximumRed = maximumRed + maximumRed2; + maximumGreen = maximumGreen + maximumGreen2; + maximumBlue = maximumBlue + maximumBlue2; +#endif // #if defined(FFX_CAS_BETTER_DIAGONALS) + + // Smooth minimum distance to signal limit divided by smooth max. +#if defined(FFX_CAS_USE_PRECISE_MATH) + FfxFloat16x2 reciprocalMaximumRed = ffxReciprocalHalf(maximumRed); + FfxFloat16x2 reciprocalMaximumGreen = ffxReciprocalHalf(maximumGreen); + FfxFloat16x2 reciprocalMaximumBlue = ffxReciprocalHalf(maximumBlue); +#else + FfxFloat16x2 reciprocalMaximumRed = ffxApproximateReciprocalHalf(maximumRed); + FfxFloat16x2 reciprocalMaximumGreen = ffxApproximateReciprocalHalf(maximumGreen); + FfxFloat16x2 reciprocalMaximumBlue = ffxApproximateReciprocalHalf(maximumBlue); +#endif // #if defined(FFX_CAS_USE_PRECISE_MATH) + +#if defined(FFX_CAS_BETTER_DIAGONALS) + FfxFloat16x2 amplifyRed = ffxSaturate(min(minimumRed, FFX_BROADCAST_FLOAT16X2(2.0) - maximumRed) * reciprocalMaximumRed); + FfxFloat16x2 amplifyGreen = ffxSaturate(min(minimumGreen, FFX_BROADCAST_FLOAT16X2(2.0) - maximumGreen) * reciprocalMaximumGreen); + FfxFloat16x2 amplifyBlue = ffxSaturate(min(minimumBlue, FFX_BROADCAST_FLOAT16X2(2.0) - maximumBlue) * reciprocalMaximumBlue); +#else + FfxFloat16x2 amplifyRed = ffxSaturate(min(minimumRed, FFX_BROADCAST_FLOAT16X2(1.0) - maximumRed) * reciprocalMaximumRed); + FfxFloat16x2 amplifyGreen = ffxSaturate(min(minimumGreen, FFX_BROADCAST_FLOAT16X2(1.0) - maximumGreen) * reciprocalMaximumGreen); + FfxFloat16x2 amplifyBlue = ffxSaturate(min(minimumBlue, FFX_BROADCAST_FLOAT16X2(1.0) - maximumBlue) * reciprocalMaximumBlue); +#endif // #if defined(FFX_CAS_BETTER_DIAGONALS) + + // Shaping amount of sharpening. +#if defined(FFX_CAS_USE_PRECISE_MATH) + amplifyRed = ffxSqrt(amplifyRed); + amplifyGreen = ffxSqrt(amplifyGreen); + amplifyBlue = ffxSqrt(amplifyBlue); +#else + amplifyRed = ffxApproximateSqrtHalf(amplifyRed); + amplifyGreen = ffxApproximateSqrtHalf(amplifyGreen); + amplifyBlue = ffxApproximateSqrtHalf(amplifyBlue); +#endif // #if defined(FFX_CAS_USE_PRECISE_MATH) + + // Filter shape. + FfxFloat16 peak = FFX_UINT32_TO_FLOAT16X2(const1.y).x; + FfxFloat16x2 weightRed = amplifyRed * FFX_BROADCAST_FLOAT16X2(peak); + FfxFloat16x2 weightGreen = amplifyGreen * FFX_BROADCAST_FLOAT16X2(peak); + FfxFloat16x2 weightBlue = amplifyBlue * FFX_BROADCAST_FLOAT16X2(peak); + // Filter. +#if defined(FFX_CAS_USE_PRECISE_MATH) + FfxFloat16x2 reciprocalWeight = ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(1.0) + FFX_BROADCAST_FLOAT16X2(4.0) * weightGreen); +#else + FfxFloat16x2 reciprocalWeight = ffxApproximateReciprocalMediumHalf(FFX_BROADCAST_FLOAT16X2(1.0) + FFX_BROADCAST_FLOAT16X2(4.0) * weightGreen); +#endif // #if defined(FFX_CAS_USE_PRECISE_MATH) + + outPixelRed = ffxSaturate((bR * weightGreen + dR * weightGreen + fR * weightGreen + hR * weightGreen + eR) * reciprocalWeight); + outPixelGreen = ffxSaturate((bG * weightGreen + dG * weightGreen + fG * weightGreen + hG * weightGreen + eG) * reciprocalWeight); + outPixelBlue = ffxSaturate((bB * weightGreen + dB * weightGreen + fB * weightGreen + hB * weightGreen + eB) * reciprocalWeight); +} +#endif // #if FFX_HALF == 1 + +// Scaling algorithm adaptively interpolates between nearest 4 results of the non-scaling algorithm. +void casFilterWithScaling( + FFX_PARAMETER_OUT FfxFloat32 pixR, + FFX_PARAMETER_OUT FfxFloat32 pixG, + FFX_PARAMETER_OUT FfxFloat32 pixB, + FFX_PARAMETER_IN FfxUInt32x2 samplePosition, + FFX_PARAMETER_IN FfxUInt32x4 const0, + FFX_PARAMETER_IN FfxUInt32x4 const1) +{ + // a b c d + // e f g h + // i j k l + // m n o p + // Working these 4 results. + // +-----+-----+ + // | | | + // | f..|..g | + // | . | . | + // +-----+-----+ + // | . | . | + // | j..|..k | + // | | | + // +-----+-----+ + FfxFloat32x2 pixelPosition = FfxFloat32x2(samplePosition) * ffxAsFloat(const0.xy) + ffxAsFloat(const0.zw); + FfxFloat32x2 floorPixelPosition = floor(pixelPosition); + pixelPosition -= floorPixelPosition; + FfxInt32x2 finalSamplePosition = FfxInt32x2(floorPixelPosition); + FfxFloat32x3 a = casLoad(finalSamplePosition + FfxInt32x2(-1, -1)); + FfxFloat32x3 b = casLoad(finalSamplePosition + FfxInt32x2(0, -1)); + FfxFloat32x3 e = casLoad(finalSamplePosition + FfxInt32x2(-1, 0)); + FfxFloat32x3 f = casLoad(finalSamplePosition); + FfxFloat32x3 c = casLoad(finalSamplePosition + FfxInt32x2(1, -1)); + FfxFloat32x3 d = casLoad(finalSamplePosition + FfxInt32x2(2, -1)); + FfxFloat32x3 g = casLoad(finalSamplePosition + FfxInt32x2(1, 0)); + FfxFloat32x3 h = casLoad(finalSamplePosition + FfxInt32x2(2, 0)); + FfxFloat32x3 i = casLoad(finalSamplePosition + FfxInt32x2(-1, 1)); + FfxFloat32x3 j = casLoad(finalSamplePosition + FfxInt32x2(0, 1)); + FfxFloat32x3 m = casLoad(finalSamplePosition + FfxInt32x2(-1, 2)); + FfxFloat32x3 n = casLoad(finalSamplePosition + FfxInt32x2(0, 2)); + FfxFloat32x3 k = casLoad(finalSamplePosition + FfxInt32x2(1, 1)); + FfxFloat32x3 l = casLoad(finalSamplePosition + FfxInt32x2(2, 1)); + FfxFloat32x3 o = casLoad(finalSamplePosition + FfxInt32x2(1, 2)); + FfxFloat32x3 p = casLoad(finalSamplePosition + FfxInt32x2(2, 2)); + + // Run optional input transform. + casInput(a.r, a.g, a.b); + casInput(b.r, b.g, b.b); + casInput(c.r, c.g, c.b); + casInput(d.r, d.g, d.b); + casInput(e.r, e.g, e.b); + casInput(f.r, f.g, f.b); + casInput(g.r, g.g, g.b); + casInput(h.r, h.g, h.b); + casInput(i.r, i.g, i.b); + casInput(j.r, j.g, j.b); + casInput(k.r, k.g, k.b); + casInput(l.r, l.g, l.b); + casInput(m.r, m.g, m.b); + casInput(n.r, n.g, n.b); + casInput(o.r, o.g, o.b); + casInput(p.r, p.g, p.b); + + // Soft min and max. + // These are 2.0x bigger (factored out the extra multiply). + // a b c b + // e f g * 0.5 + e f g * 0.5 [F] + // i j k j + FfxFloat32 minimumRed = ffxMin3(ffxMin3(b.r, e.r, f.r), g.r, j.r); + FfxFloat32 minimumGreen = ffxMin3(ffxMin3(b.g, e.g, f.g), g.g, j.g); + FfxFloat32 minimumBlue = ffxMin3(ffxMin3(b.b, e.b, f.b), g.b, j.b); + +#if defined(FFX_CAS_BETTER_DIAGONALS) + FfxFloat32 mnfR2 = ffxMin3(ffxMin3(minimumRed, a.r, c.r), i.r, k.r); + FfxFloat32 mnfG2 = ffxMin3(ffxMin3(minimumGreen, a.g, c.g), i.g, k.g); + FfxFloat32 mnfB2 = ffxMin3(ffxMin3(minimumBlue, a.b, c.b), i.b, k.b); + minimumRed = minimumRed + mnfR2; + minimumGreen = minimumGreen + mnfG2; + minimumBlue = minimumBlue + mnfB2; +#endif // #if defined(FFX_CAS_BETTER_DIAGONALS) + + FfxFloat32 mxfR = ffxMax3(ffxMax3(b.r, e.r, f.r), g.r, j.r); + FfxFloat32 mxfG = ffxMax3(ffxMax3(b.g, e.g, f.g), g.g, j.g); + FfxFloat32 mxfB = ffxMax3(ffxMax3(b.b, e.b, f.b), g.b, j.b); + +#if defined(FFX_CAS_BETTER_DIAGONALS) + FfxFloat32 mxfR2 = ffxMax3(ffxMax3(mxfR, a.r, c.r), i.r, k.r); + FfxFloat32 mxfG2 = ffxMax3(ffxMax3(mxfG, a.g, c.g), i.g, k.g); + FfxFloat32 mxfB2 = ffxMax3(ffxMax3(mxfB, a.b, c.b), i.b, k.b); + mxfR = mxfR + mxfR2; + mxfG = mxfG + mxfG2; + mxfB = mxfB + mxfB2; +#endif // #if defined(FFX_CAS_BETTER_DIAGONALS) + + // b c d c + // f g h * 0.5 + f g h * 0.5 [G] + // j k l k + FfxFloat32 mngR = ffxMin3(ffxMin3(c.r, f.r, g.r), h.r, k.r); + FfxFloat32 mngG = ffxMin3(ffxMin3(c.g, f.g, g.g), h.g, k.g); + FfxFloat32 mngB = ffxMin3(ffxMin3(c.b, f.b, g.b), h.b, k.b); + +#if defined(FFX_CAS_BETTER_DIAGONALS) + FfxFloat32 mngR2 = ffxMin3(ffxMin3(mngR, b.r, d.r), j.r, l.r); + FfxFloat32 mngG2 = ffxMin3(ffxMin3(mngG, b.g, d.g), j.g, l.g); + FfxFloat32 mngB2 = ffxMin3(ffxMin3(mngB, b.b, d.b), j.b, l.b); + mngR = mngR + mngR2; + mngG = mngG + mngG2; + mngB = mngB + mngB2; +#endif // #if defined(FFX_CAS_BETTER_DIAGONALS) + + FfxFloat32 mxgR = ffxMax3(ffxMax3(c.r, f.r, g.r), h.r, k.r); + FfxFloat32 mxgG = ffxMax3(ffxMax3(c.g, f.g, g.g), h.g, k.g); + FfxFloat32 mxgB = ffxMax3(ffxMax3(c.b, f.b, g.b), h.b, k.b); + +#if defined(FFX_CAS_BETTER_DIAGONALS) + FfxFloat32 mxgR2 = ffxMax3(ffxMax3(mxgR, b.r, d.r), j.r, l.r); + FfxFloat32 mxgG2 = ffxMax3(ffxMax3(mxgG, b.g, d.g), j.g, l.g); + FfxFloat32 mxgB2 = ffxMax3(ffxMax3(mxgB, b.b, d.b), j.b, l.b); + mxgR = mxgR + mxgR2; + mxgG = mxgG + mxgG2; + mxgB = mxgB + mxgB2; +#endif // #if defined(FFX_CAS_BETTER_DIAGONALS) + + // e f g f + // i j k * 0.5 + i j k * 0.5 [J] + // m n o n + FfxFloat32 mnjR = ffxMin3(ffxMin3(f.r, i.r, j.r), k.r, n.r); + FfxFloat32 mnjG = ffxMin3(ffxMin3(f.g, i.g, j.g), k.g, n.g); + FfxFloat32 mnjB = ffxMin3(ffxMin3(f.b, i.b, j.b), k.b, n.b); + +#if defined(FFX_CAS_BETTER_DIAGONALS) + FfxFloat32 mnjR2 = ffxMin3(ffxMin3(mnjR, e.r, g.r), m.r, o.r); + FfxFloat32 mnjG2 = ffxMin3(ffxMin3(mnjG, e.g, g.g), m.g, o.g); + FfxFloat32 mnjB2 = ffxMin3(ffxMin3(mnjB, e.b, g.b), m.b, o.b); + mnjR = mnjR + mnjR2; + mnjG = mnjG + mnjG2; + mnjB = mnjB + mnjB2; +#endif // #if defined(FFX_CAS_BETTER_DIAGONALS) + + FfxFloat32 mxjR = ffxMax3(ffxMax3(f.r, i.r, j.r), k.r, n.r); + FfxFloat32 mxjG = ffxMax3(ffxMax3(f.g, i.g, j.g), k.g, n.g); + FfxFloat32 mxjB = ffxMax3(ffxMax3(f.b, i.b, j.b), k.b, n.b); + +#if defined(FFX_CAS_BETTER_DIAGONALS) + FfxFloat32 mxjR2 = ffxMax3(ffxMax3(mxjR, e.r, g.r), m.r, o.r); + FfxFloat32 mxjG2 = ffxMax3(ffxMax3(mxjG, e.g, g.g), m.g, o.g); + FfxFloat32 mxjB2 = ffxMax3(ffxMax3(mxjB, e.b, g.b), m.b, o.b); + mxjR = mxjR + mxjR2; + mxjG = mxjG + mxjG2; + mxjB = mxjB + mxjB2; +#endif // #if defined(FFX_CAS_BETTER_DIAGONALS) + + // f g h g + // j k l * 0.5 + j k l * 0.5 [K] + // n o p o + FfxFloat32 mnkR = ffxMin3(ffxMin3(g.r, j.r, k.r), l.r, o.r); + FfxFloat32 mnkG = ffxMin3(ffxMin3(g.g, j.g, k.g), l.g, o.g); + FfxFloat32 mnkB = ffxMin3(ffxMin3(g.b, j.b, k.b), l.b, o.b); + +#if defined(FFX_CAS_BETTER_DIAGONALS) + FfxFloat32 mnkR2 = ffxMin3(ffxMin3(mnkR, f.r, h.r), n.r, p.r); + FfxFloat32 mnkG2 = ffxMin3(ffxMin3(mnkG, f.g, h.g), n.g, p.g); + FfxFloat32 mnkB2 = ffxMin3(ffxMin3(mnkB, f.b, h.b), n.b, p.b); + mnkR = mnkR + mnkR2; + mnkG = mnkG + mnkG2; + mnkB = mnkB + mnkB2; +#endif // #if defined(FFX_CAS_BETTER_DIAGONALS) + + FfxFloat32 mxkR = ffxMax3(ffxMax3(g.r, j.r, k.r), l.r, o.r); + FfxFloat32 mxkG = ffxMax3(ffxMax3(g.g, j.g, k.g), l.g, o.g); + FfxFloat32 mxkB = ffxMax3(ffxMax3(g.b, j.b, k.b), l.b, o.b); + +#if defined(FFX_CAS_BETTER_DIAGONALS) + FfxFloat32 mxkR2 = ffxMax3(ffxMax3(mxkR, f.r, h.r), n.r, p.r); + FfxFloat32 mxkG2 = ffxMax3(ffxMax3(mxkG, f.g, h.g), n.g, p.g); + FfxFloat32 mxkB2 = ffxMax3(ffxMax3(mxkB, f.b, h.b), n.b, p.b); + mxkR = mxkR + mxkR2; + mxkG = mxkG + mxkG2; + mxkB = mxkB + mxkB2; +#endif // #if defined(FFX_CAS_BETTER_DIAGONALS) + +#if defined(FFX_CAS_USE_PRECISE_MATH) + // Smooth minimum distance to signal limit divided by smooth max. + FfxFloat32 rcpMfR = ffxReciprocal(mxfR); + FfxFloat32 rcpMfG = ffxReciprocal(mxfG); + FfxFloat32 rcpMfB = ffxReciprocal(mxfB); + FfxFloat32 rcpMgR = ffxReciprocal(mxgR); + FfxFloat32 rcpMgG = ffxReciprocal(mxgG); + FfxFloat32 rcpMgB = ffxReciprocal(mxgB); + FfxFloat32 rcpMjR = ffxReciprocal(mxjR); + FfxFloat32 rcpMjG = ffxReciprocal(mxjG); + FfxFloat32 rcpMjB = ffxReciprocal(mxjB); + FfxFloat32 rcpMkR = ffxReciprocal(mxkR); + FfxFloat32 rcpMkG = ffxReciprocal(mxkG); + FfxFloat32 rcpMkB = ffxReciprocal(mxkB); +#else + // Smooth minimum distance to signal limit divided by smooth max. + FfxFloat32 rcpMfR = ffxApproximateReciprocal(mxfR); + FfxFloat32 rcpMfG = ffxApproximateReciprocal(mxfG); + FfxFloat32 rcpMfB = ffxApproximateReciprocal(mxfB); + FfxFloat32 rcpMgR = ffxApproximateReciprocal(mxgR); + FfxFloat32 rcpMgG = ffxApproximateReciprocal(mxgG); + FfxFloat32 rcpMgB = ffxApproximateReciprocal(mxgB); + FfxFloat32 rcpMjR = ffxApproximateReciprocal(mxjR); + FfxFloat32 rcpMjG = ffxApproximateReciprocal(mxjG); + FfxFloat32 rcpMjB = ffxApproximateReciprocal(mxjB); + FfxFloat32 rcpMkR = ffxApproximateReciprocal(mxkR); + FfxFloat32 rcpMkG = ffxApproximateReciprocal(mxkG); + FfxFloat32 rcpMkB = ffxApproximateReciprocal(mxkB); +#endif // #if defined(FFX_CAS_USE_PRECISE_MATH) + +#if defined(FFX_CAS_BETTER_DIAGONALS) + FfxFloat32 ampfR = ffxSaturate(ffxMin(minimumRed, FfxFloat32(2.0) - mxfR) * rcpMfR); + FfxFloat32 ampfG = ffxSaturate(ffxMin(minimumGreen, FfxFloat32(2.0) - mxfG) * rcpMfG); + FfxFloat32 ampfB = ffxSaturate(ffxMin(minimumBlue, FfxFloat32(2.0) - mxfB) * rcpMfB); + FfxFloat32 ampgR = ffxSaturate(ffxMin(mngR, FfxFloat32(2.0) - mxgR) * rcpMgR); + FfxFloat32 ampgG = ffxSaturate(ffxMin(mngG, FfxFloat32(2.0) - mxgG) * rcpMgG); + FfxFloat32 ampgB = ffxSaturate(ffxMin(mngB, FfxFloat32(2.0) - mxgB) * rcpMgB); + FfxFloat32 ampjR = ffxSaturate(ffxMin(mnjR, FfxFloat32(2.0) - mxjR) * rcpMjR); + FfxFloat32 ampjG = ffxSaturate(ffxMin(mnjG, FfxFloat32(2.0) - mxjG) * rcpMjG); + FfxFloat32 ampjB = ffxSaturate(ffxMin(mnjB, FfxFloat32(2.0) - mxjB) * rcpMjB); + FfxFloat32 ampkR = ffxSaturate(ffxMin(mnkR, FfxFloat32(2.0) - mxkR) * rcpMkR); + FfxFloat32 ampkG = ffxSaturate(ffxMin(mnkG, FfxFloat32(2.0) - mxkG) * rcpMkG); + FfxFloat32 ampkB = ffxSaturate(ffxMin(mnkB, FfxFloat32(2.0) - mxkB) * rcpMkB); +#else + FfxFloat32 ampfR = ffxSaturate(ffxMin(minimumRed, FfxFloat32(1.0) - mxfR) * rcpMfR); + FfxFloat32 ampfG = ffxSaturate(ffxMin(minimumGreen, FfxFloat32(1.0) - mxfG) * rcpMfG); + FfxFloat32 ampfB = ffxSaturate(ffxMin(minimumBlue, FfxFloat32(1.0) - mxfB) * rcpMfB); + FfxFloat32 ampgR = ffxSaturate(ffxMin(mngR, FfxFloat32(1.0) - mxgR) * rcpMgR); + FfxFloat32 ampgG = ffxSaturate(ffxMin(mngG, FfxFloat32(1.0) - mxgG) * rcpMgG); + FfxFloat32 ampgB = ffxSaturate(ffxMin(mngB, FfxFloat32(1.0) - mxgB) * rcpMgB); + FfxFloat32 ampjR = ffxSaturate(ffxMin(mnjR, FfxFloat32(1.0) - mxjR) * rcpMjR); + FfxFloat32 ampjG = ffxSaturate(ffxMin(mnjG, FfxFloat32(1.0) - mxjG) * rcpMjG); + FfxFloat32 ampjB = ffxSaturate(ffxMin(mnjB, FfxFloat32(1.0) - mxjB) * rcpMjB); + FfxFloat32 ampkR = ffxSaturate(ffxMin(mnkR, FfxFloat32(1.0) - mxkR) * rcpMkR); + FfxFloat32 ampkG = ffxSaturate(ffxMin(mnkG, FfxFloat32(1.0) - mxkG) * rcpMkG); + FfxFloat32 ampkB = ffxSaturate(ffxMin(mnkB, FfxFloat32(1.0) - mxkB) * rcpMkB); +#endif // #if defined(FFX_CAS_BETTER_DIAGONALS) + +#if defined(FFX_CAS_USE_PRECISE_MATH) + // Shaping amount of sharpening. + ampfR = ffxSqrt(ampfR); + ampfG = ffxSqrt(ampfG); + ampfB = ffxSqrt(ampfB); + ampgR = ffxSqrt(ampgR); + ampgG = ffxSqrt(ampgG); + ampgB = ffxSqrt(ampgB); + ampjR = ffxSqrt(ampjR); + ampjG = ffxSqrt(ampjG); + ampjB = ffxSqrt(ampjB); + ampkR = ffxSqrt(ampkR); + ampkG = ffxSqrt(ampkG); + ampkB = ffxSqrt(ampkB); +#else + // Shaping amount of sharpening. + ampfR = ffxApproximateSqrt(ampfR); + ampfG = ffxApproximateSqrt(ampfG); + ampfB = ffxApproximateSqrt(ampfB); + ampgR = ffxApproximateSqrt(ampgR); + ampgG = ffxApproximateSqrt(ampgG); + ampgB = ffxApproximateSqrt(ampgB); + ampjR = ffxApproximateSqrt(ampjR); + ampjG = ffxApproximateSqrt(ampjG); + ampjB = ffxApproximateSqrt(ampjB); + ampkR = ffxApproximateSqrt(ampkR); + ampkG = ffxApproximateSqrt(ampkG); + ampkB = ffxApproximateSqrt(ampkB); +#endif // #if defined(FFX_CAS_USE_PRECISE_MATH) + + // Filter shape. + // 0 w 0 + // w 1 w + // 0 w 0 + FfxFloat32 peak = ffxAsFloat(const1.x); + FfxFloat32 wfR = ampfR * peak; + FfxFloat32 wfG = ampfG * peak; + FfxFloat32 wfB = ampfB * peak; + FfxFloat32 wgR = ampgR * peak; + FfxFloat32 wgG = ampgG * peak; + FfxFloat32 wgB = ampgB * peak; + FfxFloat32 wjR = ampjR * peak; + FfxFloat32 wjG = ampjG * peak; + FfxFloat32 wjB = ampjB * peak; + FfxFloat32 wkR = ampkR * peak; + FfxFloat32 wkG = ampkG * peak; + FfxFloat32 wkB = ampkB * peak; + + // Blend between 4 results. + // s t + // u v + FfxFloat32 s = (FfxFloat32(1.0) - pixelPosition.x) * (FfxFloat32(1.0) - pixelPosition.y); + FfxFloat32 t = pixelPosition.x * (FfxFloat32(1.0) - pixelPosition.y); + FfxFloat32 u = (FfxFloat32(1.0) - pixelPosition.x) * pixelPosition.y; + FfxFloat32 v = pixelPosition.x * pixelPosition.y; + + // Thin edges to hide bilinear interpolation (helps diagonals). + FfxFloat32 thinB = 1.0 / 32.0; + +#if defined(FFX_CAS_USE_PRECISE_MATH) + s *= ffxReciprocal(thinB + (mxfG - minimumGreen)); + t *= ffxReciprocal(thinB + (mxgG - mngG)); + u *= ffxReciprocal(thinB + (mxjG - mnjG)); + v *= ffxReciprocal(thinB + (mxkG - mnkG)); +#else + s *= ffxApproximateReciprocal(thinB + (mxfG - minimumGreen)); + t *= ffxApproximateReciprocal(thinB + (mxgG - mngG)); + u *= ffxApproximateReciprocal(thinB + (mxjG - mnjG)); + v *= ffxApproximateReciprocal(thinB + (mxkG - mnkG)); +#endif // #if defined(FFX_CAS_USE_PRECISE_MATH) + + // Final weighting. + // b c + // e f g h + // i j k l + // n o + // _____ _____ _____ _____ + // fs gt + // + // _____ _____ _____ _____ + // fs s gt fs t gt + // ju kv + // _____ _____ _____ _____ + // fs gt + // ju u kv ju v kv + // _____ _____ _____ _____ + // + // ju kv + FfxFloat32 qbeR = wfR * s; + FfxFloat32 qbeG = wfG * s; + FfxFloat32 qbeB = wfB * s; + FfxFloat32 qchR = wgR * t; + FfxFloat32 qchG = wgG * t; + FfxFloat32 qchB = wgB * t; + FfxFloat32 qfR = wgR * t + wjR * u + s; + FfxFloat32 qfG = wgG * t + wjG * u + s; + FfxFloat32 qfB = wgB * t + wjB * u + s; + FfxFloat32 qgR = wfR * s + wkR * v + t; + FfxFloat32 qgG = wfG * s + wkG * v + t; + FfxFloat32 qgB = wfB * s + wkB * v + t; + FfxFloat32 qjR = wfR * s + wkR * v + u; + FfxFloat32 qjG = wfG * s + wkG * v + u; + FfxFloat32 qjB = wfB * s + wkB * v + u; + FfxFloat32 qkR = wgR * t + wjR * u + v; + FfxFloat32 qkG = wgG * t + wjG * u + v; + FfxFloat32 qkB = wgB * t + wjB * u + v; + FfxFloat32 qinR = wjR * u; + FfxFloat32 qinG = wjG * u; + FfxFloat32 qinB = wjB * u; + FfxFloat32 qloR = wkR * v; + FfxFloat32 qloG = wkG * v; + FfxFloat32 qloB = wkB * v; + + // Using green coef only, depending on dead code removal to strip out the extra overhead. +#if defined(FFX_CAS_USE_PRECISE_MATH) + FfxFloat32 rcpWG = ffxReciprocal(FfxFloat32(2.0) * qbeG + FfxFloat32(2.0) * qchG + FfxFloat32(2.0) * qinG + FfxFloat32(2.0) * qloG + qfG + qgG + qjG + qkG); +#else + FfxFloat32 rcpWG = ffxApproximateReciprocalMedium(FfxFloat32(2.0) * qbeG + FfxFloat32(2.0) * qchG + FfxFloat32(2.0) * qinG + FfxFloat32(2.0) * qloG + qfG + + qgG + qjG + qkG); +#endif // #if defined(FFX_CAS_USE_PRECISE_MATH) + + pixR = ffxSaturate((b.r * qbeG + e.r * qbeG + c.r * qchG + h.r * qchG + i.r * qinG + n.r * qinG + l.r * qloG + o.r * qloG + f.r * qfG + g.r * qgG + + j.r * qjG + + k.r * qkG) * + rcpWG); + pixG = ffxSaturate((b.g * qbeG + e.g * qbeG + c.g * qchG + h.g * qchG + i.g * qinG + n.g * qinG + l.g * qloG + o.g * qloG + f.g * qfG + g.g * qgG + + j.g * qjG + + k.g * qkG) * + rcpWG); + pixB = ffxSaturate((b.b * qbeG + e.b * qbeG + c.b * qchG + h.b * qchG + i.b * qinG + n.b * qinG + l.b * qloG + o.b * qloG + f.b * qfG + g.b * qgG + + j.b * qjG + + k.b * qkG) * + rcpWG); +} + +#if FFX_HALF == 1 +// Half precision version algorithm with scaling and filters 2 tiles in one run. +void casFilterWithScalingHalf( + FFX_PARAMETER_OUT FfxFloat16x2 pixR, + FFX_PARAMETER_OUT FfxFloat16x2 pixG, + FFX_PARAMETER_OUT FfxFloat16x2 pixB, + FFX_PARAMETER_IN FfxUInt32x2 ip, // Integer pixel position in output. + FFX_PARAMETER_IN FfxUInt32x4 const0, // Constants generated by ffxCasSetup(). + FFX_PARAMETER_IN FfxUInt32x4 const1) +{ + FfxFloat32x2 pp = FfxFloat32x2(ip) * ffxAsFloat(const0.xy) + ffxAsFloat(const0.zw); + + // Tile 0. + // Fractional position is needed in high precision here. + FfxFloat32x2 fp0 = floor(pp); + FfxFloat16x2 ppX; + ppX.x = FfxFloat16(pp.x - fp0.x); + FfxFloat16 ppY = FfxFloat16(pp.y - fp0.y); + FfxInt16x2 sp0 = FfxInt16x2(fp0); + FfxFloat16x3 a0 = casLoadHalf(sp0 + FfxInt16x2(-1, -1)); + FfxFloat16x3 b0 = casLoadHalf(sp0 + FfxInt16x2(0, -1)); + FfxFloat16x3 e0 = casLoadHalf(sp0 + FfxInt16x2(-1, 0)); + FfxFloat16x3 f0 = casLoadHalf(sp0); + FfxFloat16x3 c0 = casLoadHalf(sp0 + FfxInt16x2(1, -1)); + FfxFloat16x3 d0 = casLoadHalf(sp0 + FfxInt16x2(2, -1)); + FfxFloat16x3 g0 = casLoadHalf(sp0 + FfxInt16x2(1, 0)); + FfxFloat16x3 h0 = casLoadHalf(sp0 + FfxInt16x2(2, 0)); + FfxFloat16x3 i0 = casLoadHalf(sp0 + FfxInt16x2(-1, 1)); + FfxFloat16x3 j0 = casLoadHalf(sp0 + FfxInt16x2(0, 1)); + FfxFloat16x3 m0 = casLoadHalf(sp0 + FfxInt16x2(-1, 2)); + FfxFloat16x3 n0 = casLoadHalf(sp0 + FfxInt16x2(0, 2)); + FfxFloat16x3 k0 = casLoadHalf(sp0 + FfxInt16x2(1, 1)); + FfxFloat16x3 l0 = casLoadHalf(sp0 + FfxInt16x2(2, 1)); + FfxFloat16x3 o0 = casLoadHalf(sp0 + FfxInt16x2(1, 2)); + FfxFloat16x3 p0 = casLoadHalf(sp0 + FfxInt16x2(2, 2)); + + // Tile 1 (offset only in x). + FfxFloat32 pp1 = pp.x + ffxAsFloat(const1.z); + FfxFloat32 fp1 = floor(pp1); + ppX.y = FfxFloat16(pp1 - fp1); + FfxInt16x2 sp1 = FfxInt16x2(fp1, sp0.y); + FfxFloat16x3 a1 = casLoadHalf(sp1 + FfxInt16x2(-1, -1)); + FfxFloat16x3 b1 = casLoadHalf(sp1 + FfxInt16x2(0, -1)); + FfxFloat16x3 e1 = casLoadHalf(sp1 + FfxInt16x2(-1, 0)); + FfxFloat16x3 f1 = casLoadHalf(sp1); + FfxFloat16x3 c1 = casLoadHalf(sp1 + FfxInt16x2(1, -1)); + FfxFloat16x3 d1 = casLoadHalf(sp1 + FfxInt16x2(2, -1)); + FfxFloat16x3 g1 = casLoadHalf(sp1 + FfxInt16x2(1, 0)); + FfxFloat16x3 h1 = casLoadHalf(sp1 + FfxInt16x2(2, 0)); + FfxFloat16x3 i1 = casLoadHalf(sp1 + FfxInt16x2(-1, 1)); + FfxFloat16x3 j1 = casLoadHalf(sp1 + FfxInt16x2(0, 1)); + FfxFloat16x3 m1 = casLoadHalf(sp1 + FfxInt16x2(-1, 2)); + FfxFloat16x3 n1 = casLoadHalf(sp1 + FfxInt16x2(0, 2)); + FfxFloat16x3 k1 = casLoadHalf(sp1 + FfxInt16x2(1, 1)); + FfxFloat16x3 l1 = casLoadHalf(sp1 + FfxInt16x2(2, 1)); + FfxFloat16x3 o1 = casLoadHalf(sp1 + FfxInt16x2(1, 2)); + FfxFloat16x3 p1 = casLoadHalf(sp1 + FfxInt16x2(2, 2)); + + // AOS to SOA conversion. + FfxFloat16x2 aR = FfxFloat16x2(a0.r, a1.r); + FfxFloat16x2 aG = FfxFloat16x2(a0.g, a1.g); + FfxFloat16x2 aB = FfxFloat16x2(a0.b, a1.b); + FfxFloat16x2 bR = FfxFloat16x2(b0.r, b1.r); + FfxFloat16x2 bG = FfxFloat16x2(b0.g, b1.g); + FfxFloat16x2 bB = FfxFloat16x2(b0.b, b1.b); + FfxFloat16x2 cR = FfxFloat16x2(c0.r, c1.r); + FfxFloat16x2 cG = FfxFloat16x2(c0.g, c1.g); + FfxFloat16x2 cB = FfxFloat16x2(c0.b, c1.b); + FfxFloat16x2 dR = FfxFloat16x2(d0.r, d1.r); + FfxFloat16x2 dG = FfxFloat16x2(d0.g, d1.g); + FfxFloat16x2 dB = FfxFloat16x2(d0.b, d1.b); + FfxFloat16x2 eR = FfxFloat16x2(e0.r, e1.r); + FfxFloat16x2 eG = FfxFloat16x2(e0.g, e1.g); + FfxFloat16x2 eB = FfxFloat16x2(e0.b, e1.b); + FfxFloat16x2 fR = FfxFloat16x2(f0.r, f1.r); + FfxFloat16x2 fG = FfxFloat16x2(f0.g, f1.g); + FfxFloat16x2 fB = FfxFloat16x2(f0.b, f1.b); + FfxFloat16x2 gR = FfxFloat16x2(g0.r, g1.r); + FfxFloat16x2 gG = FfxFloat16x2(g0.g, g1.g); + FfxFloat16x2 gB = FfxFloat16x2(g0.b, g1.b); + FfxFloat16x2 hR = FfxFloat16x2(h0.r, h1.r); + FfxFloat16x2 hG = FfxFloat16x2(h0.g, h1.g); + FfxFloat16x2 hB = FfxFloat16x2(h0.b, h1.b); + FfxFloat16x2 iR = FfxFloat16x2(i0.r, i1.r); + FfxFloat16x2 iG = FfxFloat16x2(i0.g, i1.g); + FfxFloat16x2 iB = FfxFloat16x2(i0.b, i1.b); + FfxFloat16x2 jR = FfxFloat16x2(j0.r, j1.r); + FfxFloat16x2 jG = FfxFloat16x2(j0.g, j1.g); + FfxFloat16x2 jB = FfxFloat16x2(j0.b, j1.b); + FfxFloat16x2 kR = FfxFloat16x2(k0.r, k1.r); + FfxFloat16x2 kG = FfxFloat16x2(k0.g, k1.g); + FfxFloat16x2 kB = FfxFloat16x2(k0.b, k1.b); + FfxFloat16x2 lR = FfxFloat16x2(l0.r, l1.r); + FfxFloat16x2 lG = FfxFloat16x2(l0.g, l1.g); + FfxFloat16x2 lB = FfxFloat16x2(l0.b, l1.b); + FfxFloat16x2 mR = FfxFloat16x2(m0.r, m1.r); + FfxFloat16x2 mG = FfxFloat16x2(m0.g, m1.g); + FfxFloat16x2 mB = FfxFloat16x2(m0.b, m1.b); + FfxFloat16x2 nR = FfxFloat16x2(n0.r, n1.r); + FfxFloat16x2 nG = FfxFloat16x2(n0.g, n1.g); + FfxFloat16x2 nB = FfxFloat16x2(n0.b, n1.b); + FfxFloat16x2 oR = FfxFloat16x2(o0.r, o1.r); + FfxFloat16x2 oG = FfxFloat16x2(o0.g, o1.g); + FfxFloat16x2 oB = FfxFloat16x2(o0.b, o1.b); + FfxFloat16x2 pR = FfxFloat16x2(p0.r, p1.r); + FfxFloat16x2 pG = FfxFloat16x2(p0.g, p1.g); + FfxFloat16x2 pB = FfxFloat16x2(p0.b, p1.b); + + // Run optional input transform. + casInputHalf(aR, aG, aB); + casInputHalf(bR, bG, bB); + casInputHalf(cR, cG, cB); + casInputHalf(dR, dG, dB); + casInputHalf(eR, eG, eB); + casInputHalf(fR, fG, fB); + casInputHalf(gR, gG, gB); + casInputHalf(hR, hG, hB); + casInputHalf(iR, iG, iB); + casInputHalf(jR, jG, jB); + casInputHalf(kR, kG, kB); + casInputHalf(lR, lG, lB); + casInputHalf(mR, mG, mB); + casInputHalf(nR, nG, nB); + casInputHalf(oR, oG, oB); + casInputHalf(pR, pG, pB); + + // Soft min and max. + // These are 2.0x bigger (factored out the extra multiply). + // a b c b + // e f g * 0.5 + e f g * 0.5 [F] + // i j k j + FfxFloat16x2 minimumRed = ffxMin3Half(ffxMin3Half(bR, eR, fR), gR, jR); + FfxFloat16x2 minimumGreen = ffxMin3Half(ffxMin3Half(bG, eG, fG), gG, jG); + FfxFloat16x2 minimumBlue = ffxMin3Half(ffxMin3Half(bB, eB, fB), gB, jB); + +#ifdef FFX_CAS_BETTER_DIAGONALS + FfxFloat16x2 mnfR2 = ffxMin3Half(ffxMin3Half(minimumRed, aR, cR), iR, kR); + FfxFloat16x2 mnfG2 = ffxMin3Half(ffxMin3Half(minimumGreen, aG, cG), iG, kG); + FfxFloat16x2 mnfB2 = ffxMin3Half(ffxMin3Half(minimumBlue, aB, cB), iB, kB); + minimumRed = minimumRed + mnfR2; + minimumGreen = minimumGreen + mnfG2; + minimumBlue = minimumBlue + mnfB2; +#endif + FfxFloat16x2 mxfR = ffxMax3Half(ffxMax3Half(bR, eR, fR), gR, jR); + FfxFloat16x2 mxfG = ffxMax3Half(ffxMax3Half(bG, eG, fG), gG, jG); + FfxFloat16x2 mxfB = ffxMax3Half(ffxMax3Half(bB, eB, fB), gB, jB); +#ifdef FFX_CAS_BETTER_DIAGONALS + FfxFloat16x2 mxfR2 = ffxMax3Half(ffxMax3Half(mxfR, aR, cR), iR, kR); + FfxFloat16x2 mxfG2 = ffxMax3Half(ffxMax3Half(mxfG, aG, cG), iG, kG); + FfxFloat16x2 mxfB2 = ffxMax3Half(ffxMax3Half(mxfB, aB, cB), iB, kB); + mxfR = mxfR + mxfR2; + mxfG = mxfG + mxfG2; + mxfB = mxfB + mxfB2; +#endif + // b c d c + // f g h * 0.5 + f g h * 0.5 [G] + // j k l k + FfxFloat16x2 mngR = ffxMin3Half(ffxMin3Half(cR, fR, gR), hR, kR); + FfxFloat16x2 mngG = ffxMin3Half(ffxMin3Half(cG, fG, gG), hG, kG); + FfxFloat16x2 mngB = ffxMin3Half(ffxMin3Half(cB, fB, gB), hB, kB); +#ifdef FFX_CAS_BETTER_DIAGONALS + FfxFloat16x2 mngR2 = ffxMin3Half(ffxMin3Half(mngR, bR, dR), jR, lR); + FfxFloat16x2 mngG2 = ffxMin3Half(ffxMin3Half(mngG, bG, dG), jG, lG); + FfxFloat16x2 mngB2 = ffxMin3Half(ffxMin3Half(mngB, bB, dB), jB, lB); + mngR = mngR + mngR2; + mngG = mngG + mngG2; + mngB = mngB + mngB2; +#endif + FfxFloat16x2 mxgR = ffxMax3Half(ffxMax3Half(cR, fR, gR), hR, kR); + FfxFloat16x2 mxgG = ffxMax3Half(ffxMax3Half(cG, fG, gG), hG, kG); + FfxFloat16x2 mxgB = ffxMax3Half(ffxMax3Half(cB, fB, gB), hB, kB); +#ifdef FFX_CAS_BETTER_DIAGONALS + FfxFloat16x2 mxgR2 = ffxMax3Half(ffxMax3Half(mxgR, bR, dR), jR, lR); + FfxFloat16x2 mxgG2 = ffxMax3Half(ffxMax3Half(mxgG, bG, dG), jG, lG); + FfxFloat16x2 mxgB2 = ffxMax3Half(ffxMax3Half(mxgB, bB, dB), jB, lB); + mxgR = mxgR + mxgR2; + mxgG = mxgG + mxgG2; + mxgB = mxgB + mxgB2; +#endif + // e f g f + // i j k * 0.5 + i j k * 0.5 [J] + // m n o n + FfxFloat16x2 mnjR = ffxMin3Half(ffxMin3Half(fR, iR, jR), kR, nR); + FfxFloat16x2 mnjG = ffxMin3Half(ffxMin3Half(fG, iG, jG), kG, nG); + FfxFloat16x2 mnjB = ffxMin3Half(ffxMin3Half(fB, iB, jB), kB, nB); +#ifdef FFX_CAS_BETTER_DIAGONALS + FfxFloat16x2 mnjR2 = ffxMin3Half(ffxMin3Half(mnjR, eR, gR), mR, oR); + FfxFloat16x2 mnjG2 = ffxMin3Half(ffxMin3Half(mnjG, eG, gG), mG, oG); + FfxFloat16x2 mnjB2 = ffxMin3Half(ffxMin3Half(mnjB, eB, gB), mB, oB); + mnjR = mnjR + mnjR2; + mnjG = mnjG + mnjG2; + mnjB = mnjB + mnjB2; +#endif + FfxFloat16x2 mxjR = ffxMax3Half(ffxMax3Half(fR, iR, jR), kR, nR); + FfxFloat16x2 mxjG = ffxMax3Half(ffxMax3Half(fG, iG, jG), kG, nG); + FfxFloat16x2 mxjB = ffxMax3Half(ffxMax3Half(fB, iB, jB), kB, nB); +#ifdef FFX_CAS_BETTER_DIAGONALS + FfxFloat16x2 mxjR2 = ffxMax3Half(ffxMax3Half(mxjR, eR, gR), mR, oR); + FfxFloat16x2 mxjG2 = ffxMax3Half(ffxMax3Half(mxjG, eG, gG), mG, oG); + FfxFloat16x2 mxjB2 = ffxMax3Half(ffxMax3Half(mxjB, eB, gB), mB, oB); + mxjR = mxjR + mxjR2; + mxjG = mxjG + mxjG2; + mxjB = mxjB + mxjB2; +#endif + // f g h g + // j k l * 0.5 + j k l * 0.5 [K] + // n o p o + FfxFloat16x2 mnkR = ffxMin3Half(ffxMin3Half(gR, jR, kR), lR, oR); + FfxFloat16x2 mnkG = ffxMin3Half(ffxMin3Half(gG, jG, kG), lG, oG); + FfxFloat16x2 mnkB = ffxMin3Half(ffxMin3Half(gB, jB, kB), lB, oB); +#ifdef FFX_CAS_BETTER_DIAGONALS + FfxFloat16x2 mnkR2 = ffxMin3Half(ffxMin3Half(mnkR, fR, hR), nR, pR); + FfxFloat16x2 mnkG2 = ffxMin3Half(ffxMin3Half(mnkG, fG, hG), nG, pG); + FfxFloat16x2 mnkB2 = ffxMin3Half(ffxMin3Half(mnkB, fB, hB), nB, pB); + mnkR = mnkR + mnkR2; + mnkG = mnkG + mnkG2; + mnkB = mnkB + mnkB2; +#endif + FfxFloat16x2 mxkR = ffxMax3Half(ffxMax3Half(gR, jR, kR), lR, oR); + FfxFloat16x2 mxkG = ffxMax3Half(ffxMax3Half(gG, jG, kG), lG, oG); + FfxFloat16x2 mxkB = ffxMax3Half(ffxMax3Half(gB, jB, kB), lB, oB); +#ifdef FFX_CAS_BETTER_DIAGONALS + FfxFloat16x2 mxkR2 = ffxMax3Half(ffxMax3Half(mxkR, fR, hR), nR, pR); + FfxFloat16x2 mxkG2 = ffxMax3Half(ffxMax3Half(mxkG, fG, hG), nG, pG); + FfxFloat16x2 mxkB2 = ffxMax3Half(ffxMax3Half(mxkB, fB, hB), nB, pB); + mxkR = mxkR + mxkR2; + mxkG = mxkG + mxkG2; + mxkB = mxkB + mxkB2; +#endif + // Smooth minimum distance to signal limit divided by smooth max. +#ifdef FFX_CAS_USE_PRECISE_MATH + FfxFloat16x2 rcpMfR = ffxReciprocalHalf(mxfR); + FfxFloat16x2 rcpMfG = ffxReciprocalHalf(mxfG); + FfxFloat16x2 rcpMfB = ffxReciprocalHalf(mxfB); + FfxFloat16x2 rcpMgR = ffxReciprocalHalf(mxgR); + FfxFloat16x2 rcpMgG = ffxReciprocalHalf(mxgG); + FfxFloat16x2 rcpMgB = ffxReciprocalHalf(mxgB); + FfxFloat16x2 rcpMjR = ffxReciprocalHalf(mxjR); + FfxFloat16x2 rcpMjG = ffxReciprocalHalf(mxjG); + FfxFloat16x2 rcpMjB = ffxReciprocalHalf(mxjB); + FfxFloat16x2 rcpMkR = ffxReciprocalHalf(mxkR); + FfxFloat16x2 rcpMkG = ffxReciprocalHalf(mxkG); + FfxFloat16x2 rcpMkB = ffxReciprocalHalf(mxkB); +#else + FfxFloat16x2 rcpMfR = ffxApproximateReciprocalHalf(mxfR); + FfxFloat16x2 rcpMfG = ffxApproximateReciprocalHalf(mxfG); + FfxFloat16x2 rcpMfB = ffxApproximateReciprocalHalf(mxfB); + FfxFloat16x2 rcpMgR = ffxApproximateReciprocalHalf(mxgR); + FfxFloat16x2 rcpMgG = ffxApproximateReciprocalHalf(mxgG); + FfxFloat16x2 rcpMgB = ffxApproximateReciprocalHalf(mxgB); + FfxFloat16x2 rcpMjR = ffxApproximateReciprocalHalf(mxjR); + FfxFloat16x2 rcpMjG = ffxApproximateReciprocalHalf(mxjG); + FfxFloat16x2 rcpMjB = ffxApproximateReciprocalHalf(mxjB); + FfxFloat16x2 rcpMkR = ffxApproximateReciprocalHalf(mxkR); + FfxFloat16x2 rcpMkG = ffxApproximateReciprocalHalf(mxkG); + FfxFloat16x2 rcpMkB = ffxApproximateReciprocalHalf(mxkB); +#endif +#ifdef FFX_CAS_BETTER_DIAGONALS + FfxFloat16x2 ampfR = ffxSaturate(min(minimumRed, FFX_BROADCAST_FLOAT16X2(2.0) - mxfR) * rcpMfR); + FfxFloat16x2 ampfG = ffxSaturate(min(minimumGreen, FFX_BROADCAST_FLOAT16X2(2.0) - mxfG) * rcpMfG); + FfxFloat16x2 ampfB = ffxSaturate(min(minimumBlue, FFX_BROADCAST_FLOAT16X2(2.0) - mxfB) * rcpMfB); + FfxFloat16x2 ampgR = ffxSaturate(min(mngR, FFX_BROADCAST_FLOAT16X2(2.0) - mxgR) * rcpMgR); + FfxFloat16x2 ampgG = ffxSaturate(min(mngG, FFX_BROADCAST_FLOAT16X2(2.0) - mxgG) * rcpMgG); + FfxFloat16x2 ampgB = ffxSaturate(min(mngB, FFX_BROADCAST_FLOAT16X2(2.0) - mxgB) * rcpMgB); + FfxFloat16x2 ampjR = ffxSaturate(min(mnjR, FFX_BROADCAST_FLOAT16X2(2.0) - mxjR) * rcpMjR); + FfxFloat16x2 ampjG = ffxSaturate(min(mnjG, FFX_BROADCAST_FLOAT16X2(2.0) - mxjG) * rcpMjG); + FfxFloat16x2 ampjB = ffxSaturate(min(mnjB, FFX_BROADCAST_FLOAT16X2(2.0) - mxjB) * rcpMjB); + FfxFloat16x2 ampkR = ffxSaturate(min(mnkR, FFX_BROADCAST_FLOAT16X2(2.0) - mxkR) * rcpMkR); + FfxFloat16x2 ampkG = ffxSaturate(min(mnkG, FFX_BROADCAST_FLOAT16X2(2.0) - mxkG) * rcpMkG); + FfxFloat16x2 ampkB = ffxSaturate(min(mnkB, FFX_BROADCAST_FLOAT16X2(2.0) - mxkB) * rcpMkB); +#else + FfxFloat16x2 ampfR = ffxSaturate(min(minimumRed, FFX_BROADCAST_FLOAT16X2(1.0) - mxfR) * rcpMfR); + FfxFloat16x2 ampfG = ffxSaturate(min(minimumGreen, FFX_BROADCAST_FLOAT16X2(1.0) - mxfG) * rcpMfG); + FfxFloat16x2 ampfB = ffxSaturate(min(minimumBlue, FFX_BROADCAST_FLOAT16X2(1.0) - mxfB) * rcpMfB); + FfxFloat16x2 ampgR = ffxSaturate(min(mngR, FFX_BROADCAST_FLOAT16X2(1.0) - mxgR) * rcpMgR); + FfxFloat16x2 ampgG = ffxSaturate(min(mngG, FFX_BROADCAST_FLOAT16X2(1.0) - mxgG) * rcpMgG); + FfxFloat16x2 ampgB = ffxSaturate(min(mngB, FFX_BROADCAST_FLOAT16X2(1.0) - mxgB) * rcpMgB); + FfxFloat16x2 ampjR = ffxSaturate(min(mnjR, FFX_BROADCAST_FLOAT16X2(1.0) - mxjR) * rcpMjR); + FfxFloat16x2 ampjG = ffxSaturate(min(mnjG, FFX_BROADCAST_FLOAT16X2(1.0) - mxjG) * rcpMjG); + FfxFloat16x2 ampjB = ffxSaturate(min(mnjB, FFX_BROADCAST_FLOAT16X2(1.0) - mxjB) * rcpMjB); + FfxFloat16x2 ampkR = ffxSaturate(min(mnkR, FFX_BROADCAST_FLOAT16X2(1.0) - mxkR) * rcpMkR); + FfxFloat16x2 ampkG = ffxSaturate(min(mnkG, FFX_BROADCAST_FLOAT16X2(1.0) - mxkG) * rcpMkG); + FfxFloat16x2 ampkB = ffxSaturate(min(mnkB, FFX_BROADCAST_FLOAT16X2(1.0) - mxkB) * rcpMkB); +#endif + + // Shaping amount of sharpening. +#if defined(FFX_CAS_USE_PRECISE_MATH) + ampfR = ffxSqrt(ampfR); + ampfG = ffxSqrt(ampfG); + ampfB = ffxSqrt(ampfB); + ampgR = ffxSqrt(ampgR); + ampgG = ffxSqrt(ampgG); + ampgB = ffxSqrt(ampgB); + ampjR = ffxSqrt(ampjR); + ampjG = ffxSqrt(ampjG); + ampjB = ffxSqrt(ampjB); + ampkR = ffxSqrt(ampkR); + ampkG = ffxSqrt(ampkG); + ampkB = ffxSqrt(ampkB); +#else + ampfR = ffxApproximateSqrtHalf(ampfR); + ampfG = ffxApproximateSqrtHalf(ampfG); + ampfB = ffxApproximateSqrtHalf(ampfB); + ampgR = ffxApproximateSqrtHalf(ampgR); + ampgG = ffxApproximateSqrtHalf(ampgG); + ampgB = ffxApproximateSqrtHalf(ampgB); + ampjR = ffxApproximateSqrtHalf(ampjR); + ampjG = ffxApproximateSqrtHalf(ampjG); + ampjB = ffxApproximateSqrtHalf(ampjB); + ampkR = ffxApproximateSqrtHalf(ampkR); + ampkG = ffxApproximateSqrtHalf(ampkG); + ampkB = ffxApproximateSqrtHalf(ampkB); +#endif // #if defined(FFX_CAS_USE_PRECISE_MATH) + + // Filter shape. + FfxFloat16 peak = FFX_UINT32_TO_FLOAT16X2(const1.y).x; + FfxFloat16x2 wfR = ampfR * FFX_BROADCAST_FLOAT16X2(peak); + FfxFloat16x2 wfG = ampfG * FFX_BROADCAST_FLOAT16X2(peak); + FfxFloat16x2 wfB = ampfB * FFX_BROADCAST_FLOAT16X2(peak); + FfxFloat16x2 wgR = ampgR * FFX_BROADCAST_FLOAT16X2(peak); + FfxFloat16x2 wgG = ampgG * FFX_BROADCAST_FLOAT16X2(peak); + FfxFloat16x2 wgB = ampgB * FFX_BROADCAST_FLOAT16X2(peak); + FfxFloat16x2 wjR = ampjR * FFX_BROADCAST_FLOAT16X2(peak); + FfxFloat16x2 wjG = ampjG * FFX_BROADCAST_FLOAT16X2(peak); + FfxFloat16x2 wjB = ampjB * FFX_BROADCAST_FLOAT16X2(peak); + FfxFloat16x2 wkR = ampkR * FFX_BROADCAST_FLOAT16X2(peak); + FfxFloat16x2 wkG = ampkG * FFX_BROADCAST_FLOAT16X2(peak); + FfxFloat16x2 wkB = ampkB * FFX_BROADCAST_FLOAT16X2(peak); + + // Blend between 4 results. + FfxFloat16x2 s = (FFX_BROADCAST_FLOAT16X2(1.0) - ppX) * (FFX_BROADCAST_FLOAT16X2(1.0) - FFX_BROADCAST_FLOAT16X2(ppY)); + FfxFloat16x2 t = ppX * (FFX_BROADCAST_FLOAT16X2(1.0) - FFX_BROADCAST_FLOAT16X2(ppY)); + FfxFloat16x2 u = (FFX_BROADCAST_FLOAT16X2(1.0) - ppX) * FFX_BROADCAST_FLOAT16X2(ppY); + FfxFloat16x2 v = ppX * FFX_BROADCAST_FLOAT16X2(ppY); + + // Thin edges to hide bilinear interpolation (helps diagonals). + FfxFloat16x2 thinB = FFX_BROADCAST_FLOAT16X2(1.0 / 32.0); + +#if defined(FFX_CAS_USE_PRECISE_MATH) + s *= ffxReciprocalHalf(thinB + (mxfG - minimumGreen)); + t *= ffxReciprocalHalf(thinB + (mxgG - mngG)); + u *= ffxReciprocalHalf(thinB + (mxjG - mnjG)); + v *= ffxReciprocalHalf(thinB + (mxkG - mnkG)); +#else + s *= ffxApproximateReciprocalHalf(thinB + (mxfG - minimumGreen)); + t *= ffxApproximateReciprocalHalf(thinB + (mxgG - mngG)); + u *= ffxApproximateReciprocalHalf(thinB + (mxjG - mnjG)); + v *= ffxApproximateReciprocalHalf(thinB + (mxkG - mnkG)); +#endif // #if defined(FFX_CAS_USE_PRECISE_MATH) + + // Final weighting. + FfxFloat16x2 qbeR = wfR * s; + FfxFloat16x2 qbeG = wfG * s; + FfxFloat16x2 qbeB = wfB * s; + FfxFloat16x2 qchR = wgR * t; + FfxFloat16x2 qchG = wgG * t; + FfxFloat16x2 qchB = wgB * t; + FfxFloat16x2 qfR = wgR * t + wjR * u + s; + FfxFloat16x2 qfG = wgG * t + wjG * u + s; + FfxFloat16x2 qfB = wgB * t + wjB * u + s; + FfxFloat16x2 qgR = wfR * s + wkR * v + t; + FfxFloat16x2 qgG = wfG * s + wkG * v + t; + FfxFloat16x2 qgB = wfB * s + wkB * v + t; + FfxFloat16x2 qjR = wfR * s + wkR * v + u; + FfxFloat16x2 qjG = wfG * s + wkG * v + u; + FfxFloat16x2 qjB = wfB * s + wkB * v + u; + FfxFloat16x2 qkR = wgR * t + wjR * u + v; + FfxFloat16x2 qkG = wgG * t + wjG * u + v; + FfxFloat16x2 qkB = wgB * t + wjB * u + v; + FfxFloat16x2 qinR = wjR * u; + FfxFloat16x2 qinG = wjG * u; + FfxFloat16x2 qinB = wjB * u; + FfxFloat16x2 qloR = wkR * v; + FfxFloat16x2 qloG = wkG * v; + FfxFloat16x2 qloB = wkB * v; + + // Filter. +#if defined(FFX_CAS_USE_PRECISE_MATH) + FfxFloat16x2 rcpWG = ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(2.0) * qbeG + FFX_BROADCAST_FLOAT16X2(2.0) * qchG + FFX_BROADCAST_FLOAT16X2(2.0) * qinG + + FFX_BROADCAST_FLOAT16X2(2.0) * qloG + qfG + qgG + qjG + qkG); +#else + FfxFloat16x2 rcpWG = ffxApproximateReciprocalMediumHalf( + FFX_BROADCAST_FLOAT16X2(2.0) * qbeG + FFX_BROADCAST_FLOAT16X2(2.0) * qchG + FFX_BROADCAST_FLOAT16X2(2.0) * qinG + + FFX_BROADCAST_FLOAT16X2(2.0) * qloG + qfG + qgG + qjG + qkG); +#endif // #if defined(FFX_CAS_USE_PRECISE_MATH) + + pixR = ffxSaturate( + (bR * qbeG + eR * qbeG + cR * qchG + hR * qchG + iR * qinG + nR * qinG + lR * qloG + oR * qloG + fR * qfG + gR * qgG + jR * qjG + kR * qkG) * rcpWG); + pixG = ffxSaturate( + (bG * qbeG + eG * qbeG + cG * qchG + hG * qchG + iG * qinG + nG * qinG + lG * qloG + oG * qloG + fG * qfG + gG * qgG + jG * qjG + kG * qkG) * rcpWG); + pixB = ffxSaturate( + (bB * qbeG + eB * qbeG + cB * qchG + hB * qchG + iB * qinG + nB * qinG + lB * qloG + oB * qloG + fB * qfG + gB * qgG + jB * qjG + kB * qkG) * rcpWG); +} +#endif // #if FFX_HALF == 1 + +/// Apply constant adaptive sharpening (CAS) filter to a single pixel. +/// +/// @param [out] pixR Red channel output value. This is non-vector to enable switching between ffxCasFilter and ffxCasFilterHalf. +/// @param [out] pixG Green channel output value. This is non-vector to enable switching between ffxCasFilter and ffxCasFilterHalf. +/// @param [out] pixB Blue channel output value. This is non-vector to enable switching between ffxCasFilter and ffxCasFilterHalf. +/// @param [in] samplePosition The integer pixel position in the output. +/// @param [in] const0 The first constant generated by ffxCasSetup. +/// @param [in] const1 The second constant generated by ffxCasSetup. +/// @param [in] noScaling Must be a compile-time literal value. A value of true applies sharpening only (no resizing). +/// +/// @ingroup FfxGPUCas +void ffxCasFilter( + FFX_PARAMETER_OUT FfxFloat32 pixR, + FFX_PARAMETER_OUT FfxFloat32 pixG, + FFX_PARAMETER_OUT FfxFloat32 pixB, + FFX_PARAMETER_IN FfxUInt32x2 samplePosition, + FFX_PARAMETER_IN FfxUInt32x4 const0, + FFX_PARAMETER_IN FfxUInt32x4 const1, + FFX_PARAMETER_IN FfxBoolean noScaling) +{ +#if defined(FFX_CAS_DEBUG_CHECKER) + // Debug a checker pattern of on/off tiles for visual inspection. + if ((((samplePosition.x ^ samplePosition.y) >> 8u) & 1u) == 0u) { + + FfxFloat32x3 pix0 = casLoad(FfxInt32x2(samplePosition)); + pixR = pix0.r; + pixG = pix0.g; + pixB = pix0.b; + casInput(pixR, pixG, pixB); + return; + } +#endif // #if defined(FFX_CAS_PACKED_ONLY) + + if (noScaling) { + casFilterNoScaling(pixR, pixG, pixB, samplePosition, const0, const1); + } else { + casFilterWithScaling(pixR, pixG, pixB, samplePosition, const0, const1); + } +} + +#if FFX_HALF == 1 +#if defined(FFX_HLSL) +#if !defined(FFX_CAS_USE_PRECISE_MATH) +// Missing a way to do packed re-interpetation, so must disable approximation optimizations. +#define FFX_CAS_USE_PRECISE_MATH (1) +#endif // #if !defined(FFX_CAS_USE_PRECISE_MATH) +#endif // #if defined(FFX_HLSL) + +/// A utility function which can be used to convert the packed SOA form results +/// returned by ffxCasFilterHalf into AOS form data ready for storing. +/// +/// The implementation of both ffxCasDepackHalf and ffxCasFilterHalf assumes +/// that the pixels packed together are separated by 8 pixels in the X dimension. +/// +/// It is suggested to only use ffxCasDepack right before stores. This is to maintain packed +/// math for any work after ffxCasFilterHalf. +/// +/// An example might look as follows: +/// ffxCasFilterHalf(cR, cG, cB, gxy, const0, const1, false); +/// ... +/// ffxCasDepack(c0, c1, cR, cG, cB); +/// imageStore(imgDst, FfxInt32x2(gxy), FfxFloat4(c0)); +/// imageStore(imgDst, FfxInt32x2(gxy) + FfxInt32x2(8, 0), FfxFloat4(c1)); +/// +/// @param [out] pix0 +/// @param [out] pix1 +/// @param [in] pixR The red channel components of two packed pixels. +/// @param [in] pixG The green channel components of two packed pixels. +/// @param [in] pixB The blue channel components of two packed pixels. +/// +/// @ingroup FfxGPUCas +void ffxCasDepackHalf( + FFX_PARAMETER_OUT FfxFloat16x4 pix0, + FFX_PARAMETER_OUT FfxFloat16x4 pix1, + FFX_PARAMETER_IN FfxFloat16x2 pixR, + FFX_PARAMETER_IN FfxFloat16x2 pixG, + FFX_PARAMETER_IN FfxFloat16x2 pixB) +{ +#ifdef FFX_HLSL + // Invoke a slower path for DX only, since it won't allow uninitialized values. + pix0.a = pix1.a = 0.0; +#endif + pix0.rgb = FfxFloat16x3(pixR.x, pixG.x, pixB.x); + pix1.rgb = FfxFloat16x3(pixR.y, pixG.y, pixB.y); +} + +/// Apply constant adaptive sharpening (CAS) filter to a pair of pixels. +/// +/// Output values are for 2 separate 8x8 tiles in a 16x8 region. +/// pix.x = right 8x8 tile +/// pix.y = left 8x8 tile +/// This enables later processing to easily be packed as well. +/// +/// @param [out] pixR Red channel output value. This is non-vector to enable switching between ffxCasFilter and ffxCasFilterHalf. +/// @param [out] pixG Green channel output value. This is non-vector to enable switching between ffxCasFilter and ffxCasFilterHalf. +/// @param [out] pixB Blue channel output value. This is non-vector to enable switching between ffxCasFilter and ffxCasFilterHalf. +/// @param [in] samplePosition The integer pixel position in the output. +/// @param [in] const0 The first constant generated by ffxCasSetup. +/// @param [in] const1 The second constant generated by ffxCasSetup. +/// @param [in] noScaling Must be a compile-time literal value. A value of true applies sharpening only (no resizing). +/// +/// @ingroup FfxGPUCas +void ffxCasFilterHalf( + FFX_PARAMETER_OUT FfxFloat16x2 pixR, + FFX_PARAMETER_OUT FfxFloat16x2 pixG, + FFX_PARAMETER_OUT FfxFloat16x2 pixB, + FFX_PARAMETER_IN FfxUInt32x2 samplePosition, + FFX_PARAMETER_IN FfxUInt32x4 const0, + FFX_PARAMETER_IN FfxUInt32x4 const1, + FFX_PARAMETER_IN FfxBoolean noScaling) +{ +#if defined(FFX_CAS_DEBUG_CHECKER) + // Debug a checker pattern of on/off tiles for visual inspection. + if ((((samplePosition.x ^ samplePosition.y) >> 8u) & 1u) == 0u) { + + FfxFloat16x3 pix0 = casLoadHalf(FfxInt16x2(ip)); + FfxFloat16x3 pix1 = casLoadHalf(FfxInt16x2(ip) + FfxInt16x2(8, 0)); + pixR = FfxFloat16x2(pix0.r, pix1.r); + pixG = FfxFloat16x2(pix0.g, pix1.g); + pixB = FfxFloat16x2(pix0.b, pix1.b); + casInputHalf(pixR, pixG, pixB); + return; + } +#endif // #if defined(FFX_CAS_PACKED_ONLY) + + // No scaling algorithm uses minimal 3x3 pixel neighborhood. + if (noScaling) { + casFilterNoScalingHalf(pixR, pixG, pixB, samplePosition, const0, const1); + } else { + casFilterWithScalingHalf(pixR, pixG, pixB, samplePosition, const0, const1); + } +} +#endif // #if FFX_HALF == 1 +#endif // #if defined(FFX_GPU) diff --git a/Shaders/shaders/cas/ffx_cas.h.meta b/Shaders/shaders/cas/ffx_cas.h.meta new file mode 100644 index 0000000..928f82d --- /dev/null +++ b/Shaders/shaders/cas/ffx_cas.h.meta @@ -0,0 +1,65 @@ +fileFormatVersion: 2 +guid: f674a479a9610d244a0b9f93b091b49d +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/cas/ffx_cas_callbacks_hlsl.h b/Shaders/shaders/cas/ffx_cas_callbacks_hlsl.h new file mode 100644 index 0000000..9d89e61 --- /dev/null +++ b/Shaders/shaders/cas/ffx_cas_callbacks_hlsl.h @@ -0,0 +1,226 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "ffx_cas_resources.h" + +#if defined(FFX_GPU) +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic push +#pragma dxc diagnostic ignored "-Wambig-lit-shift" +#endif //__hlsl_dx_compiler +#include "../ffx_core.h" +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic pop +#endif //__hlsl_dx_compiler + +#ifndef FFX_PREFER_WAVE64 +#define FFX_PREFER_WAVE64 +#endif // #ifndef FFX_PREFER_WAVE64 + +#if defined(FFX_GPU) +#pragma warning(disable: 3205) // conversion from larger type to smaller +#endif // #if defined(FFX_GPU) + +#define DECLARE_SRV_REGISTER(regIndex) t##regIndex +#define DECLARE_UAV_REGISTER(regIndex) u##regIndex +#define DECLARE_CB_REGISTER(regIndex) b##regIndex +#define FFX_CAS_DECLARE_SRV(regIndex) register(DECLARE_SRV_REGISTER(regIndex)) +#define FFX_CAS_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex)) +#define FFX_CAS_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex)) + +#if defined(CAS_BIND_CB_CAS) + cbuffer cbCAS : FFX_CAS_DECLARE_CB(CAS_BIND_CB_CAS) + { + FfxUInt32x4 const0; + FfxUInt32x4 const1; + #define FFX_CAS_CONSTANT_BUFFER_1_SIZE 8 // Number of 32-bit values. This must be kept in sync with the cbCAS size. + }; +#else + #define const0 0 + #define const1 0 +#endif + +#if defined(FFX_GPU) +#define FFX_CAS_ROOTSIG_STRINGIFY(p) FFX_CAS_ROOTSIG_STR(p) +#define FFX_CAS_ROOTSIG_STR(p) #p +#define FFX_CAS_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_CAS_ROOTSIG_STRINGIFY(FFX_CAS_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "DescriptorTable(SRV(t0, numDescriptors = " FFX_CAS_ROOTSIG_STRINGIFY(FFX_CAS_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "CBV(b0), " \ + "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] + +#if defined(FFX_CAS_EMBED_ROOTSIG) +#define FFX_CAS_EMBED_ROOTSIG_CONTENT FFX_CAS_ROOTSIG +#else +#define FFX_CAS_EMBED_ROOTSIG_CONTENT +#endif // #if FFX_CAS_EMBED_ROOTSIG +#endif // #if defined(FFX_GPU) + + +FfxUInt32x4 Const0() +{ +#if defined(CAS_BIND_CB_CAS) + return const0; +#else + return 0.f; +#endif +} + +FfxUInt32x4 Const1() +{ +#if defined(CAS_BIND_CB_CAS) + return const1; +#else + return 0.f; +#endif +} + +SamplerState s_LinearClamp : register(s0); + + // SRVs + #if defined(CAS_BIND_SRV_INPUT_COLOR) + Texture2D r_input_color : FFX_CAS_DECLARE_SRV(CAS_BIND_SRV_INPUT_COLOR); + #endif + + // UAV declarations + #if defined(CAS_BIND_UAV_OUTPUT_COLOR) + RWTexture2D rw_output_color : FFX_CAS_DECLARE_UAV(CAS_BIND_UAV_OUTPUT_COLOR); + #endif + +#if FFX_HALF + +FfxFloat16x3 casLoadHalf(FFX_PARAMETER_IN FfxInt16x2 position) +{ +#if defined(CAS_BIND_SRV_INPUT_COLOR) + return FfxFloat16x3(r_input_color.Load(FfxInt32x3(position, 0)).rgb); +#else + return 0.f; +#endif +} + +// Transform input from the load into a linear color space between 0 and 1. +void casInputHalf(FFX_PARAMETER_INOUT FfxFloat16x2 red, FFX_PARAMETER_INOUT FfxFloat16x2 green, FFX_PARAMETER_INOUT FfxFloat16x2 blue) +{ +#if FFX_CAS_COLOR_SPACE_CONVERSION == 1 // gamma 2.0 + red *= red; + green *= green; + blue *= blue; +#elif FFX_CAS_COLOR_SPACE_CONVERSION == 2 // gamma 2.2 + red = ffxLinearFromGammaHalf(red, FfxFloat16(2.2f)); + green = ffxLinearFromGammaHalf(green, FfxFloat16(2.2f)); + blue = ffxLinearFromGammaHalf(blue, FfxFloat16(2.2f)); +#elif FFX_CAS_COLOR_SPACE_CONVERSION == 3 // sRGB output (auto-degamma'd on sampler read) + +#elif FFX_CAS_COLOR_SPACE_CONVERSION == 4 // sRGB input/output + red = ffxLinearFromSrgbHalf(red); + green = ffxLinearFromSrgbHalf(green); + blue = ffxLinearFromSrgbHalf(blue); +#endif +} + +void casOutputHalf(FFX_PARAMETER_INOUT FfxFloat16x2 red, FFX_PARAMETER_INOUT FfxFloat16x2 green, FFX_PARAMETER_INOUT FfxFloat16x2 blue) +{ +#if FFX_CAS_COLOR_SPACE_CONVERSION == 1 // gamma 2.0 + red = ffxSqrt(red); + green = ffxSqrt(green); + blue = ffxSqrt(blue); +#elif FFX_CAS_COLOR_SPACE_CONVERSION == 2 // gamma 2.2 + red = ffxGammaFromLinearHalf(red, FfxFloat16(1/2.2f)); + green = ffxGammaFromLinearHalf(green, FfxFloat16(1/2.2f)); + blue = ffxGammaFromLinearHalf(blue, FfxFloat16(1/2.2f)); +#elif FFX_CAS_COLOR_SPACE_CONVERSION == 3 // sRGB output (auto-degamma'd on sampler read) + red = ffxSrgbFromLinearHalf(red); + green = ffxSrgbFromLinearHalf(green); + blue = ffxSrgbFromLinearHalf(blue); +#elif FFX_CAS_COLOR_SPACE_CONVERSION == 4 // sRGB input/output + red = ffxSrgbFromLinearHalf(red); + green = ffxSrgbFromLinearHalf(green); + blue = ffxSrgbFromLinearHalf(blue); +#endif +} + +#else + +FfxFloat32x3 casLoad(FFX_PARAMETER_IN FfxInt32x2 position) +{ +#if defined(CAS_BIND_SRV_INPUT_COLOR) + return r_input_color.Load(FfxInt32x3(position, 0)).rgb; +#else + return 0.f; +#endif +} + +// Transform input from the load into a linear color space between 0 and 1. +void casInput(FFX_PARAMETER_INOUT FfxFloat32 red, FFX_PARAMETER_INOUT FfxFloat32 green, FFX_PARAMETER_INOUT FfxFloat32 blue) +{ +#if FFX_CAS_COLOR_SPACE_CONVERSION == 1 // gamma 2.0 + red *= red; + green *= green; + blue *= blue; +#elif FFX_CAS_COLOR_SPACE_CONVERSION == 2 // gamma 2.2 + red = ffxLinearFromGamma(red, FfxFloat32(2.2f)); + green = ffxLinearFromGamma(green, FfxFloat32(2.2f)); + blue = ffxLinearFromGamma(blue, FfxFloat32(2.2f)); +#elif FFX_CAS_COLOR_SPACE_CONVERSION == 3 // sRGB output (auto-degamma'd on sampler read) + +#elif FFX_CAS_COLOR_SPACE_CONVERSION == 4 // sRGB input/output + red = ffxLinearFromSrgb(red); + green = ffxLinearFromSrgb(green); + blue = ffxLinearFromSrgb(blue); +#endif +} + +void casOutput(FFX_PARAMETER_INOUT FfxFloat32 red, FFX_PARAMETER_INOUT FfxFloat32 green, FFX_PARAMETER_INOUT FfxFloat32 blue) +{ +#if FFX_CAS_COLOR_SPACE_CONVERSION == 1 // gamma 2.0 + red = ffxSqrt(red); + green = ffxSqrt(green); + blue = ffxSqrt(blue); +#elif FFX_CAS_COLOR_SPACE_CONVERSION == 2 // gamma 2.2 + red = ffxGammaFromLinear(red, FfxFloat32(1/2.2f)); + green = ffxGammaFromLinear(green, FfxFloat32(1/2.2f)); + blue = ffxGammaFromLinear(blue, FfxFloat32(1/2.2f)); +#elif FFX_CAS_COLOR_SPACE_CONVERSION == 3 // sRGB output (auto-degamma'd on sampler read) + red = ffxSrgbFromLinear(red); + green = ffxSrgbFromLinear(green); + blue = ffxSrgbFromLinear(blue); +#elif FFX_CAS_COLOR_SPACE_CONVERSION == 4 // sRGB input/output + red = ffxSrgbFromLinear(red); + green = ffxSrgbFromLinear(green); + blue = ffxSrgbFromLinear(blue); +#endif +} + +#endif // FFX_HALF + +void casStoreOutput(FfxInt32x2 iPxPos, FfxFloat32x4 fColor) +{ +#if defined(CAS_BIND_UAV_OUTPUT_COLOR) + rw_output_color[iPxPos] = fColor; +#endif +} + +#endif // #if defined(FFX_GPU) diff --git a/Shaders/shaders/cas/ffx_cas_callbacks_hlsl.h.meta b/Shaders/shaders/cas/ffx_cas_callbacks_hlsl.h.meta new file mode 100644 index 0000000..59fd86f --- /dev/null +++ b/Shaders/shaders/cas/ffx_cas_callbacks_hlsl.h.meta @@ -0,0 +1,65 @@ +fileFormatVersion: 2 +guid: 75c2da7c7b951b940adb44e9342dd303 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/cas/ffx_cas_resources.h b/Shaders/shaders/cas/ffx_cas_resources.h new file mode 100644 index 0000000..2ea1adb --- /dev/null +++ b/Shaders/shaders/cas/ffx_cas_resources.h @@ -0,0 +1,41 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_CAS_RESOURCES_H +#define FFX_CAS_RESOURCES_H + +#if defined(FFX_CPU) || defined(FFX_GPU) + +#define FFX_CAS_RESOURCE_IDENTIFIER_NULL 0 +#define FFX_CAS_RESOURCE_IDENTIFIER_INPUT_COLOR 1 +#define FFX_CAS_RESOURCE_IDENTIFIER_OUTPUT_COLOR 2 + +#define FFX_CAS_RESOURCE_IDENTIFIER_COUNT 3 + +// CBV resource definitions +#define FFX_CAS_CONSTANTBUFFER_IDENTIFIER_CAS 0 + +#define FFX_CAS_CONSTANTBUFFER_IDENTIFIER_COUNT 1 + +#endif // #if defined(FFX_CPU) || defined(FFX_GPU) + +#endif // FFX_CAS_RESOURCES_H diff --git a/Shaders/shaders/cas/ffx_cas_resources.h.meta b/Shaders/shaders/cas/ffx_cas_resources.h.meta new file mode 100644 index 0000000..65da05c --- /dev/null +++ b/Shaders/shaders/cas/ffx_cas_resources.h.meta @@ -0,0 +1,65 @@ +fileFormatVersion: 2 +guid: 178b95414522b1349920c12ff1ddc925 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/cas/ffx_cas_sharpen.h b/Shaders/shaders/cas/ffx_cas_sharpen.h new file mode 100644 index 0000000..3e42f98 --- /dev/null +++ b/Shaders/shaders/cas/ffx_cas_sharpen.h @@ -0,0 +1,89 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "../ffx_core.h" + +#if FFX_HALF + +#define FFX_CAS_PACKED_ONLY 1 + +#endif // FFX_HALF + +#include "ffx_cas.h" + +void Sharpen(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid) +{ + // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. + FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u); + + FfxBoolean sharpenOnly; +#if FFX_CAS_OPTION_SHARPEN_ONLY + sharpenOnly = true; +#else + sharpenOnly = false; +#endif // FFX_CAS_OPTION_SHARPEN_ONLY + +#if FFX_HALF + + // Filter. + FfxFloat16x4 c0, c1; + FfxFloat16x2 cR, cG, cB; + + ffxCasFilterHalf(cR, cG, cB, gxy, Const0(), Const1(), sharpenOnly); + casOutputHalf(cR, cG, cB); + ffxCasDepackHalf(c0, c1, cR, cG, cB); + casStoreOutput(FfxInt32x2(gxy), FfxFloat32x4(c0)); + casStoreOutput(FfxInt32x2(gxy) + FfxInt32x2(8, 0), FfxFloat32x4(c1)); + gxy.y += 8u; + + ffxCasFilterHalf(cR, cG, cB, gxy, Const0(), Const1(), sharpenOnly); + casOutputHalf(cR, cG, cB); + ffxCasDepackHalf(c0, c1, cR, cG, cB); + casStoreOutput(FfxInt32x2(gxy), FfxFloat32x4(c0)); + casStoreOutput(FfxInt32x2(gxy) + FfxInt32x2(8, 0), FfxFloat32x4(c1)); + +#else + + // Filter. + FfxFloat32x3 c; + + ffxCasFilter(c.r, c.g, c.b, gxy, Const0(), Const1(), sharpenOnly); + casOutput(c.r, c.g, c.b); + casStoreOutput(FfxInt32x2(gxy), FfxFloat32x4(c, 1)); + gxy.x += 8u; + + ffxCasFilter(c.r, c.g, c.b, gxy, Const0(), Const1(), sharpenOnly); + casOutput(c.r, c.g, c.b); + casStoreOutput(FfxInt32x2(gxy), FfxFloat32x4(c, 1)); + gxy.y += 8u; + + ffxCasFilter(c.r, c.g, c.b, gxy, Const0(), Const1(), sharpenOnly); + casOutput(c.r, c.g, c.b); + casStoreOutput(FfxInt32x2(gxy), FfxFloat32x4(c, 1)); + gxy.x -= 8u; + + ffxCasFilter(c.r, c.g, c.b, gxy, Const0(), Const1(), sharpenOnly); + casOutput(c.r, c.g, c.b); + casStoreOutput(FfxInt32x2(gxy), FfxFloat32x4(c, 1)); + +#endif // FFX_HALF +} diff --git a/Shaders/shaders/cas/ffx_cas_sharpen.h.meta b/Shaders/shaders/cas/ffx_cas_sharpen.h.meta new file mode 100644 index 0000000..26e5d42 --- /dev/null +++ b/Shaders/shaders/cas/ffx_cas_sharpen.h.meta @@ -0,0 +1,65 @@ +fileFormatVersion: 2 +guid: 4a24b15e191a20745a2da66e8ff76069 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + : Any + second: + enabled: 0 + settings: + Exclude Editor: 1 + Exclude GameCoreScarlett: 1 + Exclude GameCoreXboxOne: 1 + Exclude Linux64: 1 + Exclude OSXUniversal: 1 + Exclude PS4: 1 + Exclude PS5: 1 + Exclude Win: 1 + Exclude Win64: 1 + - first: + Any: + second: + enabled: 0 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + - first: + Standalone: Linux64 + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: OSXUniversal + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win + second: + enabled: 0 + settings: + CPU: None + - first: + Standalone: Win64 + second: + enabled: 0 + settings: + CPU: None + userData: + assetBundleName: + assetBundleVariant: diff --git a/Shaders/shaders/ffx_cas_sharpen_pass.hlsl b/Shaders/shaders/ffx_cas_sharpen_pass.hlsl new file mode 100644 index 0000000..0b01c2c --- /dev/null +++ b/Shaders/shaders/ffx_cas_sharpen_pass.hlsl @@ -0,0 +1,54 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// CAS pass +// SRV 0 : CAS_InputColor : r_input_color +// UAV 0 : CAS_OutputColor : rw__output_color +// CB 0 : cbCAS + +#define CAS_BIND_SRV_INPUT_COLOR 0 +#define CAS_BIND_UAV_OUTPUT_COLOR 0 +#define CAS_BIND_CB_CAS 0 + +#include "cas/ffx_cas_callbacks_hlsl.h" +#include "cas/ffx_cas_sharpen.h" + +#ifndef FFX_CAS_THREAD_GROUP_WIDTH +#define FFX_CAS_THREAD_GROUP_WIDTH 64 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#ifndef FFX_CAS_THREAD_GROUP_HEIGHT +#define FFX_CAS_THREAD_GROUP_HEIGHT 1 +#endif // FFX_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFX_CAS_THREAD_GROUP_DEPTH +#define FFX_CAS_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#ifndef FFX_CAS_NUM_THREADS +#define FFX_CAS_NUM_THREADS [numthreads(FFX_CAS_THREAD_GROUP_WIDTH, FFX_CAS_THREAD_GROUP_HEIGHT, FFX_CAS_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR2_NUM_THREADS + +FFX_PREFER_WAVE64 +FFX_CAS_NUM_THREADS +FFX_CAS_EMBED_ROOTSIG_CONTENT +void CS(uint3 LocalThreadId : SV_GroupThreadID, uint3 WorkGroupId : SV_GroupID, uint3 Dtid : SV_DispatchThreadID) +{ + Sharpen(LocalThreadId, WorkGroupId, Dtid); +} diff --git a/Shaders/shaders/ffx_cas_sharpen_pass.hlsl.meta b/Shaders/shaders/ffx_cas_sharpen_pass.hlsl.meta new file mode 100644 index 0000000..29dced9 --- /dev/null +++ b/Shaders/shaders/ffx_cas_sharpen_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: b9b8c665d9f11a44e9ca915bb9ce0225 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: