From 955ce3f1903809c80adef3364a873e223ea95bcb Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Tue, 6 Jun 2023 16:05:27 +0200 Subject: [PATCH] - Consolidated accumulate and accumulate+sharpen pipelines into a single pipeline, with the sharpening option being changed into a local keyword that can be enabled or disabled on-the-fly. This removes the need for an additional copy of the accumulate shader, saving a considerable amount of memory. - Changed the atomic counter from a temporary RT that gets reset every frame, to a permanent resource that gets set to 0 only on the first execution frame. Fixes auto-exposure causing a black screen on MacOS. --- Assets/Scripts/Core/Fsr2Context.cs | 18 ++++------- Assets/Scripts/Core/Fsr2Pipeline.cs | 47 ++++++---------------------- Assets/Scripts/Core/Fsr2Resources.cs | 6 ++++ 3 files changed, 21 insertions(+), 50 deletions(-) diff --git a/Assets/Scripts/Core/Fsr2Context.cs b/Assets/Scripts/Core/Fsr2Context.cs index 8a6ea50..a1d4c25 100644 --- a/Assets/Scripts/Core/Fsr2Context.cs +++ b/Assets/Scripts/Core/Fsr2Context.cs @@ -43,7 +43,6 @@ namespace FidelityFX private Fsr2Pipeline _reconstructPreviousDepthPipeline; private Fsr2Pipeline _lockPipeline; private Fsr2Pipeline _accumulatePipeline; - private Fsr2Pipeline _accumulateSharpenPipeline; private Fsr2Pipeline _rcasPipeline; private Fsr2Pipeline _computeLuminancePyramidPipeline; private Fsr2Pipeline _generateReactivePipeline; @@ -103,7 +102,6 @@ namespace FidelityFX _depthClipPipeline = new Fsr2DepthClipPipeline(_contextDescription, _resources, _fsr2ConstantsBuffer); _lockPipeline = new Fsr2LockPipeline(_contextDescription, _resources, _fsr2ConstantsBuffer); _accumulatePipeline = new Fsr2AccumulatePipeline(_contextDescription, _resources, _fsr2ConstantsBuffer); - _accumulateSharpenPipeline = new Fsr2AccumulateSharpenPipeline(_contextDescription, _resources, _fsr2ConstantsBuffer); _rcasPipeline = new Fsr2RcasPipeline(_contextDescription, _resources, _fsr2ConstantsBuffer, _rcasConstantsBuffer); _generateReactivePipeline = new Fsr2GenerateReactivePipeline(_contextDescription, _resources, _generateReactiveConstantsBuffer); _tcrAutogeneratePipeline = new Fsr2TcrAutogeneratePipeline(_contextDescription, _resources, _fsr2ConstantsBuffer, _tcrAutogenerateConstantsBuffer); @@ -115,7 +113,6 @@ namespace FidelityFX DestroyPipeline(ref _generateReactivePipeline); DestroyPipeline(ref _computeLuminancePyramidPipeline); DestroyPipeline(ref _rcasPipeline); - DestroyPipeline(ref _accumulateSharpenPipeline); DestroyPipeline(ref _accumulatePipeline); DestroyPipeline(ref _lockPipeline); DestroyPipeline(ref _reconstructPreviousDepthPipeline); @@ -153,6 +150,10 @@ namespace FidelityFX commandBuffer.ClearRenderTarget(false, true, Color.clear); commandBuffer.SetRenderTarget(_resources.LockStatus[1]); commandBuffer.ClearRenderTarget(false, true, Color.clear); + + // Reset atomic counter to 0 + commandBuffer.SetRenderTarget(_resources.SpdAtomicCounter); + commandBuffer.ClearRenderTarget(false, true, Color.clear); } int frameIndex = _resourceFrameIndex % 2; @@ -210,10 +211,6 @@ namespace FidelityFX commandBuffer.ClearRenderTarget(false, true, new Color(-1f, 1e8f, 0f, 0f)); } - // Reset atomic counter to 0 - commandBuffer.SetRenderTarget(Fsr2ShaderIDs.UavSpdAtomicCount); - commandBuffer.ClearRenderTarget(false, true, Color.clear); - // Auto exposure SetupSpdConstants(dispatchParams, out var dispatchThreadGroupCount); @@ -241,13 +238,10 @@ namespace FidelityFX // Create locks _lockPipeline.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY); - bool sharpenEnabled = dispatchParams.EnableSharpening; - // Accumulate - var accumulatePipeline = sharpenEnabled ? _accumulateSharpenPipeline : _accumulatePipeline; - accumulatePipeline.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchDstX, dispatchDstY); + _accumulatePipeline.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchDstX, dispatchDstY); - if (sharpenEnabled) + if (dispatchParams.EnableSharpening) { // Compute the constants SetupRcasConstants(dispatchParams); diff --git a/Assets/Scripts/Core/Fsr2Pipeline.cs b/Assets/Scripts/Core/Fsr2Pipeline.cs index 7db59f4..0ef1a4b 100644 --- a/Assets/Scripts/Core/Fsr2Pipeline.cs +++ b/Assets/Scripts/Core/Fsr2Pipeline.cs @@ -66,9 +66,6 @@ namespace FidelityFX // Set up shared aliasable resources, i.e. temporary render textures // These do not need to persist between frames, but they do need to be available between passes - // Resource FSR2_SpdAtomicCounter: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_UINT, FFX_RESOURCE_FLAGS_ALIASABLE - commandBuffer.GetTemporaryRT(Fsr2ShaderIDs.UavSpdAtomicCount, 1, 1, 0, default, GraphicsFormat.R32_UInt, 1, true); - // FSR2_ReconstructedPrevNearestDepth: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_UINT, FFX_RESOURCE_FLAGS_ALIASABLE commandBuffer.GetTemporaryRT(Fsr2ShaderIDs.UavReconstructedPrevNearestDepth, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R32_UInt, 1, true); @@ -91,7 +88,6 @@ namespace FidelityFX public static void UnregisterResources(CommandBuffer commandBuffer) { // Release all of the aliasable resources used this frame - commandBuffer.ReleaseTemporaryRT(Fsr2ShaderIDs.UavSpdAtomicCount); commandBuffer.ReleaseTemporaryRT(Fsr2ShaderIDs.UavReconstructedPrevNearestDepth); commandBuffer.ReleaseTemporaryRT(Fsr2ShaderIDs.UavDilatedDepth); commandBuffer.ReleaseTemporaryRT(Fsr2ShaderIDs.UavLockInputLuma); @@ -175,6 +171,7 @@ namespace FidelityFX if (dispatchParams.Color.HasValue) commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvInputColor, dispatchParams.Color.Value, 0, RenderTextureSubElement.Color); + commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.UavSpdAtomicCount, Resources.SpdAtomicCounter); commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.UavExposureMipLumaChange, Resources.SceneLuminance, ShadingChangeMipLevel); commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.UavExposureMip5, Resources.SceneLuminance, 5); commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.UavAutoExposure, Resources.AutoExposure); @@ -277,14 +274,22 @@ namespace FidelityFX // Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput. protected override bool AllowFP16 => SystemInfo.graphicsDeviceVendorID != 0x10DE; + private readonly LocalKeyword _sharpeningKeyword; + public Fsr2AccumulatePipeline(Fsr2.ContextDescription contextDescription, Fsr2Resources resources, ComputeBuffer constants) : base(contextDescription, resources, constants) { LoadComputeShader("FSR2/ffx_fsr2_accumulate_pass"); + _sharpeningKeyword = new LocalKeyword(ComputeShader, "FFX_FSR2_OPTION_APPLY_SHARPENING"); } public override void ScheduleDispatch(CommandBuffer commandBuffer, Fsr2.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { + if (dispatchParams.EnableSharpening) + commandBuffer.EnableKeyword(ComputeShader, _sharpeningKeyword); + else + commandBuffer.DisableKeyword(ComputeShader, _sharpeningKeyword); + if ((ContextDescription.Flags & Fsr2.InitializationFlags.EnableDisplayResolutionMotionVectors) == 0) commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr2ShaderIDs.SrvDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]); else if (dispatchParams.MotionVectors.HasValue) @@ -316,40 +321,6 @@ namespace FidelityFX } } - internal class Fsr2AccumulateSharpenPipeline : Fsr2AccumulatePipeline - { - private readonly ComputeShader _shaderCopy; - - public Fsr2AccumulateSharpenPipeline(Fsr2.ContextDescription contextDescription, Fsr2Resources resources, ComputeBuffer constants) - : base(contextDescription, resources, constants) - { - // Simply loading the accumulate_pass compute shader will give us the same instance as the non-sharpen pipeline - // So we have to clone the shader instance and set the extra keyword on the new copy - _shaderCopy = UnityEngine.Object.Instantiate(ComputeShader); - foreach (var keyword in ComputeShader.shaderKeywords) - { - _shaderCopy.EnableKeyword(keyword); - } - _shaderCopy.EnableKeyword("FFX_FSR2_OPTION_APPLY_SHARPENING"); - } - - public override void ScheduleDispatch(CommandBuffer commandBuffer, Fsr2.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) - { - // Temporarily swap around the shaders so that the dispatch will bind and execute the correct one - ComputeShader tmp = ComputeShader; - ComputeShader = _shaderCopy; - base.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchX, dispatchY); - ComputeShader = tmp; - } - - public override void Dispose() - { - // Since we instantiated this copy, we have to destroy it instead of unloading the shader resource - UnityEngine.Object.Destroy(_shaderCopy); - base.Dispose(); - } - } - internal class Fsr2RcasPipeline : Fsr2Pipeline { private readonly ComputeBuffer _rcasConstants; diff --git a/Assets/Scripts/Core/Fsr2Resources.cs b/Assets/Scripts/Core/Fsr2Resources.cs index 2ef9e79..6fcc444 100644 --- a/Assets/Scripts/Core/Fsr2Resources.cs +++ b/Assets/Scripts/Core/Fsr2Resources.cs @@ -34,6 +34,7 @@ namespace FidelityFX public Texture2D DefaultReactive; public Texture2D LanczosLut; public Texture2D MaximumBiasLut; + public RenderTexture SpdAtomicCounter; public RenderTexture AutoExposure; public RenderTexture SceneLuminance; public RenderTexture AutoReactive; @@ -83,6 +84,11 @@ namespace FidelityFX DefaultReactive = new Texture2D(1, 1, GraphicsFormat.R8_UNorm, TextureCreationFlags.None) { name = "FSR2_DefaultReactivityMask" }; DefaultReactive.SetPixel(0, 0, Color.clear); DefaultReactive.Apply(); + + // Resource FSR2_SpdAtomicCounter: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_UINT, FFX_RESOURCE_FLAGS_ALIASABLE + // Despite what the original FSR2 codebase says, this resource really isn't aliasable. Resetting this counter to 0 every frame breaks auto-exposure on MacOS Metal. + SpdAtomicCounter = new RenderTexture(1, 1, 0, GraphicsFormat.R32_UInt) { name = "FSR2_SpdAtomicCounter", enableRandomWrite = true }; + SpdAtomicCounter.Create(); // Resource FSR2_AutoExposure: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32G32_FLOAT, FFX_RESOURCE_FLAGS_NONE AutoExposure = new RenderTexture(1, 1, 0, GraphicsFormat.R32G32_SFloat) { name = "FSR2_AutoExposure", enableRandomWrite = true };