diff --git a/Assets/FSR2 Assets.asset b/Assets/FSR2 Assets.asset new file mode 100644 index 0000000..e1c438e --- /dev/null +++ b/Assets/FSR2 Assets.asset @@ -0,0 +1,23 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!114 &11400000 +MonoBehaviour: + m_ObjectHideFlags: 0 + m_CorrespondingSourceObject: {fileID: 0} + m_PrefabInstance: {fileID: 0} + m_PrefabAsset: {fileID: 0} + m_GameObject: {fileID: 0} + m_Enabled: 1 + m_EditorHideFlags: 0 + m_Script: {fileID: 11500000, guid: db26e15a33db6ab42a38daab0ba2712f, type: 3} + m_Name: FSR2 Assets + m_EditorClassIdentifier: + shaders: + computeLuminancePyramidPass: {fileID: 7200000, guid: 04c3480675e29a340808141e68d4cc8b, type: 3} + reconstructPreviousDepthPass: {fileID: 7200000, guid: 5060dfafe45aa67459629186ceb7464e, type: 3} + depthClipPass: {fileID: 7200000, guid: b207de122e2c4b844b89dcd7c5c77c80, type: 3} + lockPass: {fileID: 7200000, guid: 20b7864a7e7258946aaf0f1996febad3, type: 3} + accumulatePass: {fileID: 7200000, guid: 7e791d69a5be98247a93b63897bc64df, type: 3} + sharpenPass: {fileID: 7200000, guid: 40815651f0f5d994cb73da9816a7ff9b, type: 3} + autoGenReactivePass: {fileID: 7200000, guid: 67ee1b32ca5e4234db9f06984c783dee, type: 3} + tcrAutoGenPass: {fileID: 7200000, guid: f8b1c27fb6a544b43b38903592240500, type: 3} diff --git a/Assets/FSR2 Assets.asset.meta b/Assets/FSR2 Assets.asset.meta new file mode 100644 index 0000000..79f0b07 --- /dev/null +++ b/Assets/FSR2 Assets.asset.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 0c417b61f173a764895d57dfcd7613bf +NativeFormatImporter: + externalObjects: {} + mainObjectFileID: 11400000 + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Scripts/Fsr3UpscalerImageEffect.cs b/Assets/Scripts/Fsr3UpscalerImageEffect.cs index e419a90..7a9d927 100644 --- a/Assets/Scripts/Fsr3UpscalerImageEffect.cs +++ b/Assets/Scripts/Fsr3UpscalerImageEffect.cs @@ -43,6 +43,9 @@ namespace FidelityFX public bool performSharpenPass = true; [Tooltip("Strength of the sharpening effect.")] [Range(0, 1)] public float sharpness = 0.8f; + + [Tooltip("Adjust the influence of motion vectors on temporal accumulation.")] + [Range(0, 1)] public float velocityFactor = 1.0f; [Header("Exposure")] [Tooltip("Allow an exposure value to be computed internally. When set to false, either the provided exposure texture or a default exposure value will be used.")] @@ -333,6 +336,7 @@ namespace FidelityFX _dispatchDescription.CameraFar = _renderCamera.farClipPlane; _dispatchDescription.CameraFovAngleVertical = _renderCamera.fieldOfView * Mathf.Deg2Rad; _dispatchDescription.ViewSpaceToMetersFactor = 1.0f; // 1 unit is 1 meter in Unity + _dispatchDescription.VelocityFactor = velocityFactor; _dispatchDescription.Reset = _resetHistory; _dispatchDescription.Flags = enableDebugView ? Fsr3Upscaler.DispatchFlags.DrawDebugView : 0; _resetHistory = false; diff --git a/Packages/com.unity.postprocessing/PostProcessing/Editor/PostProcessLayerEditor.cs b/Packages/com.unity.postprocessing/PostProcessing/Editor/PostProcessLayerEditor.cs index a2b47da..e92d105 100644 --- a/Packages/com.unity.postprocessing/PostProcessing/Editor/PostProcessLayerEditor.cs +++ b/Packages/com.unity.postprocessing/PostProcessing/Editor/PostProcessLayerEditor.cs @@ -30,9 +30,11 @@ namespace UnityEditor.Rendering.PostProcessing SerializedProperty m_FxaaFastMode; SerializedProperty m_FxaaKeepAlpha; + SerializedProperty m_UpscalerType; SerializedProperty m_FsrQualityMode; SerializedProperty m_FsrPerformSharpen; SerializedProperty m_FsrSharpness; + SerializedProperty m_FsrVelocityFactor; SerializedProperty m_FsrExposureSource; SerializedProperty m_FsrExposureTexture; SerializedProperty m_FsrPreExposure; @@ -62,7 +64,7 @@ namespace UnityEditor.Rendering.PostProcessing new GUIContent("Fast Approximate Anti-aliasing (FXAA)"), new GUIContent("Subpixel Morphological Anti-aliasing (SMAA)"), new GUIContent("Temporal Anti-aliasing (TAA)"), - new GUIContent("FidelityFX Super Resolution 3 (FSR3) Upscaler") + new GUIContent("Advanced Upscaling") }; enum ExportMode @@ -89,9 +91,11 @@ namespace UnityEditor.Rendering.PostProcessing m_FxaaFastMode = FindProperty(x => x.fastApproximateAntialiasing.fastMode); m_FxaaKeepAlpha = FindProperty(x => x.fastApproximateAntialiasing.keepAlpha); + m_UpscalerType = FindProperty(x => x.superResolution.upscalerType); m_FsrQualityMode = FindProperty(x => x.superResolution.qualityMode); m_FsrPerformSharpen = FindProperty(x => x.superResolution.performSharpenPass); m_FsrSharpness = FindProperty(x => x.superResolution.sharpness); + m_FsrVelocityFactor = FindProperty(x => x.superResolution.velocityFactor); m_FsrExposureSource = FindProperty(x => x.superResolution.exposureSource); m_FsrExposureTexture = FindProperty(x => x.superResolution.exposure); m_FsrPreExposure = FindProperty(x => x.superResolution.preExposure); @@ -222,13 +226,15 @@ namespace UnityEditor.Rendering.PostProcessing if (!m_FxaaFastMode.boolValue && EditorUtilities.isTargetingConsolesOrMobiles) EditorGUILayout.HelpBox("For performance reasons it is recommended to use Fast Mode on mobile and console platforms.", MessageType.Warning); } - else if (m_AntialiasingMode.intValue == (int)PostProcessLayer.Antialiasing.SuperResolution) + else if (m_AntialiasingMode.intValue == (int)PostProcessLayer.Antialiasing.AdvancedUpscaling) { + EditorGUILayout.PropertyField(m_UpscalerType); EditorGUILayout.PropertyField(m_FsrQualityMode); EditorGUILayout.PropertyField(m_FsrPerformSharpen); EditorGUILayout.PropertyField(m_FsrSharpness); + EditorGUILayout.PropertyField(m_FsrVelocityFactor); EditorGUILayout.PropertyField(m_FsrExposureSource); - if (m_FsrExposureSource.intValue == (int)SuperResolution.ExposureSource.Manual) EditorGUILayout.PropertyField(m_FsrExposureTexture); + if (m_FsrExposureSource.intValue == (int)Upscaling.ExposureSource.Manual) EditorGUILayout.PropertyField(m_FsrExposureTexture); EditorGUILayout.PropertyField(m_FsrPreExposure); EditorGUILayout.PropertyField(m_FsrDebugView); EditorGUILayout.PropertyField(m_FsrAutoReactive); diff --git a/Packages/com.unity.postprocessing/PostProcessing/PostProcessResources.asset b/Packages/com.unity.postprocessing/PostProcessing/PostProcessResources.asset index c9f5acb..59eb482 100644 --- a/Packages/com.unity.postprocessing/PostProcessing/PostProcessResources.asset +++ b/Packages/com.unity.postprocessing/PostProcessing/PostProcessResources.asset @@ -127,7 +127,16 @@ MonoBehaviour: multiScaleAORender: {fileID: 7200000, guid: 34a460e8a2e66c243a9c12024e5a798d, type: 3} multiScaleAOUpsample: {fileID: 7200000, guid: 600d6212b59bb40409d19d750b5fd1e9, type: 3} gaussianDownsample: {fileID: 7200000, guid: 6dba4103d23a7904fbc49099355aff3e, type: 3} - superResolution: + fsr2Upscaler: + computeLuminancePyramidPass: {fileID: 7200000, guid: 04c3480675e29a340808141e68d4cc8b, type: 3} + reconstructPreviousDepthPass: {fileID: 7200000, guid: 5060dfafe45aa67459629186ceb7464e, type: 3} + depthClipPass: {fileID: 7200000, guid: b207de122e2c4b844b89dcd7c5c77c80, type: 3} + lockPass: {fileID: 7200000, guid: 20b7864a7e7258946aaf0f1996febad3, type: 3} + accumulatePass: {fileID: 7200000, guid: 7e791d69a5be98247a93b63897bc64df, type: 3} + sharpenPass: {fileID: 7200000, guid: 40815651f0f5d994cb73da9816a7ff9b, type: 3} + autoGenReactivePass: {fileID: 7200000, guid: 67ee1b32ca5e4234db9f06984c783dee, type: 3} + tcrAutoGenPass: {fileID: 7200000, guid: f8b1c27fb6a544b43b38903592240500, type: 3} + fsr3Upscaler: prepareInputsPass: {fileID: 7200000, guid: 4f59e5b9179d74844ae06a30ae1e0629, type: 3} lumaPyramidPass: {fileID: 7200000, guid: d253be05abcdc80428503d3e4cce3a36, type: 3} shadingChangePyramidPass: {fileID: 7200000, guid: 251e663738905fa4d8817001682d802f, type: 3} diff --git a/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs b/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs index 0ecd9f7..65b2c15 100644 --- a/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs +++ b/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs @@ -205,7 +205,7 @@ namespace UnityEngine.Rendering.PostProcessing } else if (context.IsSuperResolutionActive()) { - var jitter = context.superResolution.jitter; + var jitter = context.upscaling.Jitter; sheet.properties.SetVector(ShaderIDs.TaaParams, new Vector3(jitter.x, jitter.y, m_ResetHistory ? 0f : 0.85f)); } diff --git a/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/SuperResolution.cs b/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/SuperResolution.cs deleted file mode 100644 index 516d59d..0000000 --- a/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/SuperResolution.cs +++ /dev/null @@ -1,341 +0,0 @@ -// Copyright (c) 2024 Nico de Poel -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -using System; -using System.Collections; -using System.Collections.Generic; -using UnityEngine; -using UnityEngine.Experimental.Rendering; -using FidelityFX; -using FidelityFX.FSR3; - -namespace UnityEngine.Rendering.PostProcessing -{ - [UnityEngine.Scripting.Preserve] - [Serializable] - public class SuperResolution - { - public Func callbacksFactory { get; set; } = (context) => new Fsr3UpscalerCallbacksBase(); - - [Tooltip("Standard scaling ratio presets.")] - public Fsr3Upscaler.QualityMode qualityMode = Fsr3Upscaler.QualityMode.Quality; - - [Tooltip("Apply RCAS sharpening to the image after upscaling.")] - public bool performSharpenPass = true; - [Tooltip("Strength of the sharpening effect.")] - [Range(0, 1)] public float sharpness = 0.8f; - - [Tooltip("Choose where to get the exposure value from. Use auto-exposure from either FSR3 or Unity, provide a manual exposure texture, or use a default value.")] - public ExposureSource exposureSource = ExposureSource.Auto; - [Tooltip("Value by which the input signal will be divided, to get back to the original signal produced by the game.")] - public float preExposure = 1.0f; - [Tooltip("Optional 1x1 texture containing the exposure value for the current frame.")] - public Texture exposure = null; - - public enum ExposureSource - { - Default, - Auto, - Unity, - Manual, - } - - [Tooltip("Enable a debug view to analyze the upscaling process.")] - public bool enableDebugView = false; - - [Tooltip("Optional texture to control the influence of the current frame on the reconstructed output. If unset, either an auto-generated or a default cleared reactive mask will be used.")] - public Texture reactiveMask = null; - [Tooltip("Optional texture for marking areas of specialist rendering which should be accounted for during the upscaling process. If unset, a default cleared mask will be used.")] - public Texture transparencyAndCompositionMask = null; - [Tooltip("Automatically generate a reactive mask based on the difference between opaque-only render output and the final render output including alpha transparencies.")] - public bool autoGenerateReactiveMask = true; - [Tooltip("Parameters to control the process of auto-generating a reactive mask.")] - public GenerateReactiveParameters generateReactiveParameters = new GenerateReactiveParameters(); - - [Serializable] - public class GenerateReactiveParameters - { - [Tooltip("A value to scale the output")] - [Range(0, 2)] public float scale = 0.5f; - [Tooltip("A threshold value to generate a binary reactive mask")] - [Range(0, 1)] public float cutoffThreshold = 0.2f; - [Tooltip("A value to set for the binary reactive mask")] - [Range(0, 1)] public float binaryValue = 0.9f; - [Tooltip("Flags to determine how to generate the reactive mask")] - public Fsr3Upscaler.GenerateReactiveFlags flags = Fsr3Upscaler.GenerateReactiveFlags.ApplyTonemap | Fsr3Upscaler.GenerateReactiveFlags.ApplyThreshold | Fsr3Upscaler.GenerateReactiveFlags.UseComponentsMax; - } - - [Tooltip("(Experimental) Automatically generate and use Reactive mask and Transparency & composition mask internally.")] - public bool autoGenerateTransparencyAndComposition = false; - [Tooltip("Parameters to control the process of auto-generating transparency and composition masks.")] - public GenerateTcrParameters generateTransparencyAndCompositionParameters = new GenerateTcrParameters(); - - [Serializable] - public class GenerateTcrParameters - { - [Tooltip("Setting this value too small will cause visual instability. Larger values can cause ghosting.")] - [Range(0, 1)] public float autoTcThreshold = 0.05f; - [Tooltip("Smaller values will increase stability at hard edges of translucent objects.")] - [Range(0, 2)] public float autoTcScale = 1.0f; - [Tooltip("Larger values result in more reactive pixels.")] - [Range(0, 10)] public float autoReactiveScale = 5.0f; - [Tooltip("Maximum value reactivity can reach.")] - [Range(0, 1)] public float autoReactiveMax = 0.9f; - } - - public Vector2 jitter { get; private set; } - public Vector2Int renderSize => _maxRenderSize; - public Vector2Int displaySize => _displaySize; - public RenderTargetIdentifier colorOpaqueOnly { get; set; } - - private Fsr3UpscalerContext _fsrContext; - private Vector2Int _maxRenderSize; - private Vector2Int _displaySize; - private bool _resetHistory; - - private IFsr3UpscalerCallbacks _callbacks; - - private readonly Fsr3Upscaler.DispatchDescription _dispatchDescription = new Fsr3Upscaler.DispatchDescription(); - private readonly Fsr3Upscaler.GenerateReactiveDescription _genReactiveDescription = new Fsr3Upscaler.GenerateReactiveDescription(); - - private Fsr3Upscaler.QualityMode _prevQualityMode; - private ExposureSource _prevExposureSource; - private Vector2Int _prevDisplaySize; - - private Rect _originalRect; - - public bool IsSupported() - { - return SystemInfo.supportsComputeShaders && SystemInfo.supportsMotionVectors; - } - - public DepthTextureMode GetCameraFlags() - { - return DepthTextureMode.Depth | DepthTextureMode.MotionVectors; - } - - public void Release() - { - DestroyFsrContext(); - } - - public void ResetHistory() - { - _resetHistory = true; - } - - public void ConfigureJitteredProjectionMatrix(PostProcessRenderContext context) - { - ApplyJitter(context.camera); - } - - public void ConfigureCameraViewport(PostProcessRenderContext context) - { - var camera = context.camera; - _originalRect = camera.rect; - - // Determine the desired rendering and display resolutions - _displaySize = new Vector2Int(camera.pixelWidth, camera.pixelHeight); - Fsr3Upscaler.GetRenderResolutionFromQualityMode(out int maxRenderWidth, out int maxRenderHeight, _displaySize.x, _displaySize.y, qualityMode); - _maxRenderSize = new Vector2Int(maxRenderWidth, maxRenderHeight); - - // Render to a smaller portion of the screen by manipulating the camera's viewport rect - camera.aspect = (float)_displaySize.x / _displaySize.y; - camera.rect = new Rect(0, 0, _originalRect.width * _maxRenderSize.x / _displaySize.x, _originalRect.height * _maxRenderSize.y / _displaySize.y); - } - - public void ResetCameraViewport(PostProcessRenderContext context) - { - context.camera.rect = _originalRect; - } - - public void Render(PostProcessRenderContext context) - { - var cmd = context.command; - cmd.BeginSample("FSR3 Upscaler"); - - // Monitor for any resolution changes and recreate the FSR3 Upscaler context if necessary - // We can't create an FSR3 Upscaler context without info from the post-processing context, so delay the initial setup until here - if (_fsrContext == null || _displaySize.x != _prevDisplaySize.x || _displaySize.y != _prevDisplaySize.y || qualityMode != _prevQualityMode || exposureSource != _prevExposureSource) - { - DestroyFsrContext(); - CreateFsrContext(context); - } - - SetupDispatchDescription(context); - - if (autoGenerateReactiveMask) - { - SetupAutoReactiveDescription(context); - - var scaledRenderSize = _genReactiveDescription.RenderSize; - cmd.GetTemporaryRT(Fsr3ShaderIDs.UavAutoReactive, scaledRenderSize.x, scaledRenderSize.y, 0, default, GraphicsFormat.R8_UNorm, 1, true); - _fsrContext.GenerateReactiveMask(_genReactiveDescription, cmd); - _dispatchDescription.Reactive = new ResourceView(Fsr3ShaderIDs.UavAutoReactive); - } - - _fsrContext.Dispatch(_dispatchDescription, cmd); - - cmd.EndSample("FSR3 Upscaler"); - - _resetHistory = false; - } - - private void CreateFsrContext(PostProcessRenderContext context) - { - _prevQualityMode = qualityMode; - _prevExposureSource = exposureSource; - _prevDisplaySize = _displaySize; - - // Initialize FSR3 Upscaler context - Fsr3Upscaler.InitializationFlags flags = 0; - if (context.camera.allowHDR) flags |= Fsr3Upscaler.InitializationFlags.EnableHighDynamicRange; - if (exposureSource == ExposureSource.Auto) flags |= Fsr3Upscaler.InitializationFlags.EnableAutoExposure; - if (RuntimeUtilities.IsDynamicResolutionEnabled(context.camera)) flags |= Fsr3Upscaler.InitializationFlags.EnableDynamicResolution; - - _callbacks = callbacksFactory(context); - _fsrContext = Fsr3Upscaler.CreateContext(_displaySize, _maxRenderSize, context.resources.computeShaders.superResolution, flags); - - // Apply a mipmap bias so that textures retain their sharpness - float biasOffset = Fsr3Upscaler.GetMipmapBiasOffset(_maxRenderSize.x, _displaySize.x); - if (!float.IsNaN(biasOffset) && !float.IsInfinity(biasOffset)) - { - _callbacks.ApplyMipmapBias(biasOffset); - } - } - - private void DestroyFsrContext() - { - if (_fsrContext != null) - { - _fsrContext.Destroy(); - _fsrContext = null; - } - - if (_callbacks != null) - { - // Undo the current mipmap bias offset - _callbacks.UndoMipmapBias(); - _callbacks = null; - } - } - - private void ApplyJitter(Camera camera) - { - var scaledRenderSize = GetScaledRenderSize(camera); - - // Perform custom jittering of the camera's projection matrix according to FSR3's recipe - int jitterPhaseCount = Fsr3Upscaler.GetJitterPhaseCount(scaledRenderSize.x, _displaySize.x); - Fsr3Upscaler.GetJitterOffset(out float jitterX, out float jitterY, Time.frameCount, jitterPhaseCount); - - _dispatchDescription.JitterOffset = new Vector2(jitterX, jitterY); - - jitterX = 2.0f * jitterX / scaledRenderSize.x; - jitterY = 2.0f * jitterY / scaledRenderSize.y; - - var jitterTranslationMatrix = Matrix4x4.Translate(new Vector3(jitterX, jitterY, 0)); - camera.nonJitteredProjectionMatrix = camera.projectionMatrix; - camera.projectionMatrix = jitterTranslationMatrix * camera.nonJitteredProjectionMatrix; - camera.useJitteredProjectionMatrixForTransparentRendering = true; - - jitter = new Vector2(jitterX, jitterY); - } - - private void SetupDispatchDescription(PostProcessRenderContext context) - { - var camera = context.camera; - - // Set up the main FSR3 Upscaler dispatch parameters - _dispatchDescription.Color = new ResourceView(context.source); - _dispatchDescription.Depth = new ResourceView(GetDepthTexture(context.camera), RenderTextureSubElement.Depth); - _dispatchDescription.MotionVectors = new ResourceView(BuiltinRenderTextureType.MotionVectors); - _dispatchDescription.Exposure = ResourceView.Unassigned; - _dispatchDescription.Reactive = ResourceView.Unassigned; - _dispatchDescription.TransparencyAndComposition = ResourceView.Unassigned; - - if (exposureSource == ExposureSource.Manual && exposure != null) _dispatchDescription.Exposure = new ResourceView(exposure); - if (exposureSource == ExposureSource.Unity) _dispatchDescription.Exposure = new ResourceView(context.autoExposureTexture); - if (reactiveMask != null) _dispatchDescription.Reactive = new ResourceView(reactiveMask); - if (transparencyAndCompositionMask != null) _dispatchDescription.TransparencyAndComposition = new ResourceView(transparencyAndCompositionMask); - - var scaledRenderSize = GetScaledRenderSize(context.camera); - - _dispatchDescription.Output = new ResourceView(context.destination); - _dispatchDescription.PreExposure = preExposure; - _dispatchDescription.EnableSharpening = performSharpenPass; - _dispatchDescription.Sharpness = sharpness; - _dispatchDescription.MotionVectorScale.x = -scaledRenderSize.x; - _dispatchDescription.MotionVectorScale.y = -scaledRenderSize.y; - _dispatchDescription.RenderSize = scaledRenderSize; - _dispatchDescription.UpscaleSize = _displaySize; - _dispatchDescription.FrameTimeDelta = Time.unscaledDeltaTime; - _dispatchDescription.CameraNear = camera.nearClipPlane; - _dispatchDescription.CameraFar = camera.farClipPlane; - _dispatchDescription.CameraFovAngleVertical = camera.fieldOfView * Mathf.Deg2Rad; - _dispatchDescription.ViewSpaceToMetersFactor = 1.0f; // 1 unit is 1 meter in Unity - _dispatchDescription.Reset = _resetHistory; - _dispatchDescription.Flags = enableDebugView ? Fsr3Upscaler.DispatchFlags.DrawDebugView : 0; - - // Set up the parameters for the optional experimental auto-TCR feature - _dispatchDescription.EnableAutoReactive = autoGenerateTransparencyAndComposition; - if (autoGenerateTransparencyAndComposition) - { - _dispatchDescription.ColorOpaqueOnly = new ResourceView(colorOpaqueOnly); - _dispatchDescription.AutoTcThreshold = generateTransparencyAndCompositionParameters.autoTcThreshold; - _dispatchDescription.AutoTcScale = generateTransparencyAndCompositionParameters.autoTcScale; - _dispatchDescription.AutoReactiveScale = generateTransparencyAndCompositionParameters.autoReactiveScale; - _dispatchDescription.AutoReactiveMax = generateTransparencyAndCompositionParameters.autoReactiveMax; - } - - if (SystemInfo.usesReversedZBuffer) - { - // Swap the near and far clip plane distances as FSR3 expects this when using inverted depth - (_dispatchDescription.CameraNear, _dispatchDescription.CameraFar) = (_dispatchDescription.CameraFar, _dispatchDescription.CameraNear); - } - } - - private void SetupAutoReactiveDescription(PostProcessRenderContext context) - { - // Set up the parameters to auto-generate a reactive mask - _genReactiveDescription.ColorOpaqueOnly = new ResourceView(colorOpaqueOnly); - _genReactiveDescription.ColorPreUpscale = new ResourceView(context.source); - _genReactiveDescription.OutReactive = new ResourceView(Fsr3ShaderIDs.UavAutoReactive); - _genReactiveDescription.RenderSize = GetScaledRenderSize(context.camera); - _genReactiveDescription.Scale = generateReactiveParameters.scale; - _genReactiveDescription.CutoffThreshold = generateReactiveParameters.cutoffThreshold; - _genReactiveDescription.BinaryValue = generateReactiveParameters.binaryValue; - _genReactiveDescription.Flags = generateReactiveParameters.flags; - } - - internal Vector2Int GetScaledRenderSize(Camera camera) - { - if (!RuntimeUtilities.IsDynamicResolutionEnabled(camera)) - return _maxRenderSize; - - return new Vector2Int(Mathf.CeilToInt(_maxRenderSize.x * ScalableBufferManager.widthScaleFactor), Mathf.CeilToInt(_maxRenderSize.y * ScalableBufferManager.heightScaleFactor)); - } - - private static BuiltinRenderTextureType GetDepthTexture(Camera cam) - { - RenderingPath renderingPath = cam.renderingPath; - return renderingPath == RenderingPath.Forward || renderingPath == RenderingPath.VertexLit ? BuiltinRenderTextureType.Depth : BuiltinRenderTextureType.CameraTarget; - } - } -} diff --git a/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling.cs b/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling.cs new file mode 100644 index 0000000..1a6d5a1 --- /dev/null +++ b/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling.cs @@ -0,0 +1,312 @@ +using System; +using System.Runtime.InteropServices; +using FidelityFX.FSR2; +using UnityEngine.Experimental.Rendering; + +namespace UnityEngine.Rendering.PostProcessing +{ + [Scripting.Preserve] + [Serializable] + public class Upscaling + { + public Func callbacksFactory { get; set; } = (context) => new UpscalerCallbacksBase(); + + public enum UpscalerType + { + [InspectorName("FidelityFX Super Resolution 2.3 (FSR2)")] FSR2, + [InspectorName("FidelityFX Super Resolution 3.1 (FSR3)")] FSR3, + } + + [Tooltip("Which upscaling technology to use.")] + public UpscalerType upscalerType = UpscalerType.FSR2; + + [Tooltip("Standard scaling ratio presets.")] + public Fsr2.QualityMode qualityMode = Fsr2.QualityMode.Quality; + + [Tooltip("Apply sharpening to the image after upscaling.")] + public bool performSharpenPass = true; + [Tooltip("Strength of the sharpening effect.")] + [Range(0, 1)] public float sharpness = 0.8f; + + [Tooltip("Adjust the influence of motion vectors on temporal accumulation.")] + [Range(0, 1)] public float velocityFactor = 1.0f; + + [Tooltip("Choose where to get the exposure value from. Use auto-exposure from either the upscaler or Unity, provide a manual exposure texture, or use a default value.")] + public ExposureSource exposureSource = ExposureSource.Auto; + [Tooltip("Value by which the input signal will be divided, to get back to the original signal produced by the game.")] + public float preExposure = 1.0f; + [Tooltip("Optional 1x1 texture containing the exposure value for the current frame.")] + public Texture exposure = null; + + public enum ExposureSource + { + Default, + Auto, + Unity, + Manual, + } + + [Tooltip("Enable a debug view to analyze the upscaling process.")] + public bool enableDebugView = false; + + [Tooltip("Optional texture to control the influence of the current frame on the reconstructed output. If unset, either an auto-generated or a default cleared reactive mask will be used.")] + public Texture reactiveMask = null; + [Tooltip("Optional texture for marking areas of specialist rendering which should be accounted for during the upscaling process. If unset, a default cleared mask will be used.")] + public Texture transparencyAndCompositionMask = null; + [Tooltip("Automatically generate a reactive mask based on the difference between opaque-only render output and the final render output including alpha transparencies.")] + public bool autoGenerateReactiveMask = true; + [Tooltip("Parameters to control the process of auto-generating a reactive mask.")] + public GenerateReactiveParameters generateReactiveParameters = new GenerateReactiveParameters(); + + [Serializable] + public class GenerateReactiveParameters + { + [Tooltip("A value to scale the output")] + [Range(0, 2)] public float scale = 0.5f; + [Tooltip("A threshold value to generate a binary reactive mask")] + [Range(0, 1)] public float cutoffThreshold = 0.2f; + [Tooltip("A value to set for the binary reactive mask")] + [Range(0, 1)] public float binaryValue = 0.9f; + [Tooltip("Flags to determine how to generate the reactive mask")] + public Fsr2.GenerateReactiveFlags flags = Fsr2.GenerateReactiveFlags.ApplyTonemap | Fsr2.GenerateReactiveFlags.ApplyThreshold | Fsr2.GenerateReactiveFlags.UseComponentsMax; + } + + [Tooltip("(Experimental) Automatically generate and use Reactive mask and Transparency & composition mask internally.")] + public bool autoGenerateTransparencyAndComposition = false; + [Tooltip("Parameters to control the process of auto-generating transparency and composition masks.")] + public GenerateTcrParameters generateTransparencyAndCompositionParameters = new GenerateTcrParameters(); + + [Serializable] + public class GenerateTcrParameters + { + [Tooltip("Setting this value too small will cause visual instability. Larger values can cause ghosting.")] + [Range(0, 1)] public float autoTcThreshold = 0.05f; + [Tooltip("Smaller values will increase stability at hard edges of translucent objects.")] + [Range(0, 2)] public float autoTcScale = 1.0f; + [Tooltip("Larger values result in more reactive pixels.")] + [Range(0, 10)] public float autoReactiveScale = 5.0f; + [Tooltip("Maximum value reactivity can reach.")] + [Range(0, 1)] public float autoReactiveMax = 0.9f; + } + + public Vector2 Jitter { get; private set; } + public Vector2 JitterOffset { get; private set; } + public Vector2Int MaxRenderSize => _maxRenderSize; + public Vector2Int UpscaleSize => _upscaleSize; + public bool Reset => _resetHistory; + public RenderTargetIdentifier ColorOpaqueOnly { get; set; } + + private bool _initialized; + private Upscaler _upscaler; + private Vector2Int _maxRenderSize; + private Vector2Int _upscaleSize; + private bool _resetHistory; + + private IUpscalerCallbacks _callbacks; + + private UpscalerType _prevUpscalerType; + private Fsr2.QualityMode _prevQualityMode; + private ExposureSource _prevExposureSource; + private Vector2Int _prevUpscaleSize; + + private Rect _originalRect; + + public bool IsSupported() + { + return SystemInfo.supportsComputeShaders && SystemInfo.supportsMotionVectors; + } + + public DepthTextureMode GetCameraFlags() + { + return DepthTextureMode.Depth | DepthTextureMode.MotionVectors; + } + + public void Release() + { + DestroyUpscaler(); + } + + public void ResetHistory() + { + _resetHistory = true; + } + + public void ConfigureJitteredProjectionMatrix(PostProcessRenderContext context) + { + ApplyJitter(context.camera); + } + + public void ConfigureCameraViewport(PostProcessRenderContext context) + { + var camera = context.camera; + _originalRect = camera.rect; + + // Determine the desired rendering and display resolutions + _upscaleSize = new Vector2Int(camera.pixelWidth, camera.pixelHeight); + Fsr2.GetRenderResolutionFromQualityMode(out int maxRenderWidth, out int maxRenderHeight, _upscaleSize.x, _upscaleSize.y, qualityMode); + _maxRenderSize = new Vector2Int(maxRenderWidth, maxRenderHeight); + + // Render to a smaller portion of the screen by manipulating the camera's viewport rect + camera.aspect = (float)_upscaleSize.x / _upscaleSize.y; + camera.rect = new Rect(0, 0, _originalRect.width * _maxRenderSize.x / _upscaleSize.x, _originalRect.height * _maxRenderSize.y / _upscaleSize.y); + } + + public void ResetCameraViewport(PostProcessRenderContext context) + { + context.camera.rect = _originalRect; + } + + public void Render(PostProcessRenderContext context) + { + // Monitor for any resolution changes and recreate the upscaler context if necessary + // We can't create an upscaler context without info from the post-processing context, so delay the initial setup until here + if (!_initialized || _upscaler == null || _upscaleSize.x != _prevUpscaleSize.x || _upscaleSize.y != _prevUpscaleSize.y || + upscalerType != _prevUpscalerType || qualityMode != _prevQualityMode || exposureSource != _prevExposureSource) + { + DestroyUpscaler(); + CreateUpscaler(context); + } + + _upscaler?.Render(context, this); + + _resetHistory = false; + } + + private void CreateUpscaler(PostProcessRenderContext context) + { + if (_upscaler == null || upscalerType != _prevUpscalerType) + { + _upscaler = upscalerType switch + { + UpscalerType.FSR2 when FSR2Upscaler.IsSupported => new FSR2Upscaler(), + UpscalerType.FSR3 when FSR3Upscaler.IsSupported => new FSR3Upscaler(), + _ => new FSR2Upscaler(), // Fallback for when the selected upscaler is not supported on the current hardware + }; + + _prevUpscalerType = upscalerType; + } + + _prevQualityMode = qualityMode; + _prevExposureSource = exposureSource; + _prevUpscaleSize = _upscaleSize; + + _callbacks = callbacksFactory(context); + + _upscaler.CreateContext(context, this); + + // Apply a mipmap bias so that textures retain their sharpness + float biasOffset = Fsr2.GetMipmapBiasOffset(_maxRenderSize.x, _upscaleSize.x); + if (_callbacks != null && !float.IsNaN(biasOffset) && !float.IsInfinity(biasOffset)) + { + _callbacks.ApplyMipmapBias(biasOffset); + } + + _initialized = true; + } + + private void DestroyUpscaler() + { + _initialized = false; + + _upscaler?.DestroyContext(); + + if (_callbacks != null) + { + // Undo the current mipmap bias offset + _callbacks.UndoMipmapBias(); + _callbacks = null; + } + } + + private void ApplyJitter(Camera camera) + { + var scaledRenderSize = GetScaledRenderSize(camera); + + // Perform custom jittering of the camera's projection matrix according to FSR's recipe + int jitterPhaseCount = Fsr2.GetJitterPhaseCount(scaledRenderSize.x, _upscaleSize.x); + Fsr2.GetJitterOffset(out float jitterX, out float jitterY, Time.frameCount, jitterPhaseCount); + + JitterOffset = new Vector2(jitterX, jitterY); + + jitterX = 2.0f * jitterX / scaledRenderSize.x; + jitterY = 2.0f * jitterY / scaledRenderSize.y; + + var jitterTranslationMatrix = Matrix4x4.Translate(new Vector3(jitterX, jitterY, 0)); + camera.nonJitteredProjectionMatrix = camera.projectionMatrix; + camera.projectionMatrix = jitterTranslationMatrix * camera.nonJitteredProjectionMatrix; + camera.useJitteredProjectionMatrixForTransparentRendering = true; + + Jitter = new Vector2(jitterX, jitterY); + } + + internal Vector2Int GetScaledRenderSize(Camera camera) + { + if (!RuntimeUtilities.IsDynamicResolutionEnabled(camera)) + return _maxRenderSize; + + return new Vector2Int(Mathf.CeilToInt(_maxRenderSize.x * ScalableBufferManager.widthScaleFactor), Mathf.CeilToInt(_maxRenderSize.y * ScalableBufferManager.heightScaleFactor)); + } + + internal static BuiltinRenderTextureType GetDepthTexture(Camera cam) + { + return cam.renderingPath is RenderingPath.Forward or RenderingPath.VertexLit ? BuiltinRenderTextureType.Depth : BuiltinRenderTextureType.CameraTarget; + } + } + + /// + /// A collection of callbacks required by the upscaler. + /// This allows some customization by the game dev on how to integrate upscaling into their own game setup. + /// + public interface IUpscalerCallbacks + { + /// + /// Apply a mipmap bias to in-game textures to prevent them from becoming blurry as the internal rendering resolution lowers. + /// This will need to be customized on a per-game basis, as there is no clear universal way to determine what are "in-game" textures. + /// The default implementation will simply apply a mipmap bias to all 2D textures, which will include things like UI textures and which might miss things like terrain texture arrays. + /// + /// Depending on how your game organizes its assets, you will want to create a filter that more specifically selects the textures that need to have this mipmap bias applied. + /// You may also want to store the bias offset value and apply it to any assets that are loaded in on demand. + /// + void ApplyMipmapBias(float biasOffset); + + void UndoMipmapBias(); + } + + /// + /// Default implementation of IUpscalerCallbacks. + /// These are fine for testing but a proper game will want to extend and override these methods. + /// + public class UpscalerCallbacksBase: IUpscalerCallbacks + { + protected float CurrentBiasOffset = 0; + + public virtual void ApplyMipmapBias(float biasOffset) + { + if (float.IsNaN(biasOffset) || float.IsInfinity(biasOffset)) + return; + + CurrentBiasOffset += biasOffset; + + if (Mathf.Approximately(CurrentBiasOffset, 0f)) + { + CurrentBiasOffset = 0f; + } + + foreach (var texture in Resources.FindObjectsOfTypeAll()) + { + if (texture.mipmapCount <= 1) + continue; + + texture.mipMapBias += biasOffset; + } + } + + public virtual void UndoMipmapBias() + { + if (CurrentBiasOffset == 0f) + return; + + ApplyMipmapBias(-CurrentBiasOffset); + } + } +} diff --git a/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling.cs.meta b/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling.cs.meta new file mode 100644 index 0000000..bf05d49 --- /dev/null +++ b/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling.cs.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: bd735f2dacb54f5e9a0722ec5851acee +timeCreated: 1729512005 \ No newline at end of file diff --git a/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling.meta b/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling.meta new file mode 100644 index 0000000..18a8f32 --- /dev/null +++ b/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: a0c49999ca0d45bfb0922a4f7366ae28 +timeCreated: 1729511667 \ No newline at end of file diff --git a/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling/FSR2Upscaler.cs b/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling/FSR2Upscaler.cs new file mode 100644 index 0000000..a34d831 --- /dev/null +++ b/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling/FSR2Upscaler.cs @@ -0,0 +1,124 @@ +using FidelityFX; +using FidelityFX.FSR2; +using UnityEngine.Experimental.Rendering; + +namespace UnityEngine.Rendering.PostProcessing +{ + internal class FSR2Upscaler: Upscaler + { + public static bool IsSupported => SystemInfo.supportsComputeShaders; + + private Fsr2Context _fsrContext; + + private readonly Fsr2.DispatchDescription _dispatchDescription = new(); + private readonly Fsr2.GenerateReactiveDescription _genReactiveDescription = new(); + + public override void CreateContext(PostProcessRenderContext context, Upscaling config) + { + // Initialize FSR2 context + Fsr2.InitializationFlags flags = 0; + if (context.camera.allowHDR) flags |= Fsr2.InitializationFlags.EnableHighDynamicRange; + if (config.exposureSource == Upscaling.ExposureSource.Auto) flags |= Fsr2.InitializationFlags.EnableAutoExposure; + if (RuntimeUtilities.IsDynamicResolutionEnabled(context.camera)) flags |= Fsr2.InitializationFlags.EnableDynamicResolution; + + _fsrContext = Fsr2.CreateContext(config.UpscaleSize, config.MaxRenderSize, context.resources.computeShaders.fsr2Upscaler, flags); + } + + public override void DestroyContext() + { + if (_fsrContext != null) + { + _fsrContext.Destroy(); + _fsrContext = null; + } + } + + public override void Render(PostProcessRenderContext context, Upscaling config) + { + var cmd = context.command; + cmd.BeginSample("FSR2"); + + SetupDispatchDescription(context, config); + + if (config.autoGenerateReactiveMask) + { + SetupAutoReactiveDescription(context, config); + + var scaledRenderSize = _genReactiveDescription.RenderSize; + cmd.GetTemporaryRT(Fsr2ShaderIDs.UavAutoReactive, scaledRenderSize.x, scaledRenderSize.y, 0, default, GraphicsFormat.R8_UNorm, 1, true); + _fsrContext.GenerateReactiveMask(_genReactiveDescription, cmd); + _dispatchDescription.Reactive = new ResourceView(Fsr2ShaderIDs.UavAutoReactive); + } + + _fsrContext.Dispatch(_dispatchDescription, cmd); + + cmd.EndSample("FSR2"); + } + + private void SetupDispatchDescription(PostProcessRenderContext context, Upscaling config) + { + var camera = context.camera; + + // Set up the main FSR2 dispatch parameters + _dispatchDescription.Color = new ResourceView(context.source); + _dispatchDescription.Depth = new ResourceView(Upscaling.GetDepthTexture(context.camera), RenderTextureSubElement.Depth); + _dispatchDescription.MotionVectors = new ResourceView(BuiltinRenderTextureType.MotionVectors); + _dispatchDescription.Exposure = ResourceView.Unassigned; + _dispatchDescription.Reactive = ResourceView.Unassigned; + _dispatchDescription.TransparencyAndComposition = ResourceView.Unassigned; + + if (config.exposureSource == Upscaling.ExposureSource.Manual && config.exposure != null) _dispatchDescription.Exposure = new ResourceView(config.exposure); + if (config.exposureSource == Upscaling.ExposureSource.Unity) _dispatchDescription.Exposure = new ResourceView(context.autoExposureTexture); + if (config.reactiveMask != null) _dispatchDescription.Reactive = new ResourceView(config.reactiveMask); + if (config.transparencyAndCompositionMask != null) _dispatchDescription.TransparencyAndComposition = new ResourceView(config.transparencyAndCompositionMask); + + var scaledRenderSize = config.GetScaledRenderSize(context.camera); + + _dispatchDescription.Output = new ResourceView(context.destination); + _dispatchDescription.PreExposure = config.preExposure; + _dispatchDescription.EnableSharpening = config.performSharpenPass; + _dispatchDescription.Sharpness = config.sharpness; + _dispatchDescription.JitterOffset = config.JitterOffset; + _dispatchDescription.MotionVectorScale.x = -scaledRenderSize.x; + _dispatchDescription.MotionVectorScale.y = -scaledRenderSize.y; + _dispatchDescription.RenderSize = scaledRenderSize; + _dispatchDescription.InputResourceSize = scaledRenderSize; + _dispatchDescription.FrameTimeDelta = Time.unscaledDeltaTime; + _dispatchDescription.CameraNear = camera.nearClipPlane; + _dispatchDescription.CameraFar = camera.farClipPlane; + _dispatchDescription.CameraFovAngleVertical = camera.fieldOfView * Mathf.Deg2Rad; + _dispatchDescription.ViewSpaceToMetersFactor = 1.0f; // 1 unit is 1 meter in Unity + _dispatchDescription.Reset = config.Reset; + + // Set up the parameters for the optional experimental auto-TCR feature + _dispatchDescription.EnableAutoReactive = config.autoGenerateTransparencyAndComposition; + if (config.autoGenerateTransparencyAndComposition) + { + _dispatchDescription.ColorOpaqueOnly = new ResourceView(config.ColorOpaqueOnly); + _dispatchDescription.AutoTcThreshold = config.generateTransparencyAndCompositionParameters.autoTcThreshold; + _dispatchDescription.AutoTcScale = config.generateTransparencyAndCompositionParameters.autoTcScale; + _dispatchDescription.AutoReactiveScale = config.generateTransparencyAndCompositionParameters.autoReactiveScale; + _dispatchDescription.AutoReactiveMax = config.generateTransparencyAndCompositionParameters.autoReactiveMax; + } + + if (SystemInfo.usesReversedZBuffer) + { + // Swap the near and far clip plane distances as FSR2 expects this when using inverted depth + (_dispatchDescription.CameraNear, _dispatchDescription.CameraFar) = (_dispatchDescription.CameraFar, _dispatchDescription.CameraNear); + } + } + + private void SetupAutoReactiveDescription(PostProcessRenderContext context, Upscaling config) + { + // Set up the parameters to auto-generate a reactive mask + _genReactiveDescription.ColorOpaqueOnly = new ResourceView(config.ColorOpaqueOnly); + _genReactiveDescription.ColorPreUpscale = new ResourceView(context.source); + _genReactiveDescription.OutReactive = new ResourceView(Fsr2ShaderIDs.UavAutoReactive); + _genReactiveDescription.RenderSize = config.GetScaledRenderSize(context.camera); + _genReactiveDescription.Scale = config.generateReactiveParameters.scale; + _genReactiveDescription.CutoffThreshold = config.generateReactiveParameters.cutoffThreshold; + _genReactiveDescription.BinaryValue = config.generateReactiveParameters.binaryValue; + _genReactiveDescription.Flags = config.generateReactiveParameters.flags; + } + } +} diff --git a/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/SuperResolution.cs.meta b/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling/FSR2Upscaler.cs.meta similarity index 83% rename from Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/SuperResolution.cs.meta rename to Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling/FSR2Upscaler.cs.meta index 979fe66..949c9da 100644 --- a/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/SuperResolution.cs.meta +++ b/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling/FSR2Upscaler.cs.meta @@ -1,5 +1,5 @@ fileFormatVersion: 2 -guid: 804fb4cfea0948247a52576cc4a79609 +guid: a0118021cf3d4485b761155cbdcaa35d MonoImporter: externalObjects: {} serializedVersion: 2 diff --git a/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling/FSR3Upscaler.cs b/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling/FSR3Upscaler.cs new file mode 100644 index 0000000..65e3513 --- /dev/null +++ b/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling/FSR3Upscaler.cs @@ -0,0 +1,125 @@ +using FidelityFX; +using FidelityFX.FSR3; +using UnityEngine.Experimental.Rendering; + +namespace UnityEngine.Rendering.PostProcessing +{ + internal class FSR3Upscaler: Upscaler + { + public static bool IsSupported => SystemInfo.supportsComputeShaders; + + private Fsr3UpscalerContext _fsrContext; + + private readonly Fsr3Upscaler.DispatchDescription _dispatchDescription = new(); + private readonly Fsr3Upscaler.GenerateReactiveDescription _genReactiveDescription = new(); + + public override void CreateContext(PostProcessRenderContext context, Upscaling config) + { + // Initialize FSR3 Upscaler context + Fsr3Upscaler.InitializationFlags flags = 0; + if (context.camera.allowHDR) flags |= Fsr3Upscaler.InitializationFlags.EnableHighDynamicRange; + if (config.exposureSource == Upscaling.ExposureSource.Auto) flags |= Fsr3Upscaler.InitializationFlags.EnableAutoExposure; + if (RuntimeUtilities.IsDynamicResolutionEnabled(context.camera)) flags |= Fsr3Upscaler.InitializationFlags.EnableDynamicResolution; + + _fsrContext = Fsr3Upscaler.CreateContext(config.UpscaleSize, config.MaxRenderSize, context.resources.computeShaders.fsr3Upscaler, flags); + } + + public override void DestroyContext() + { + if (_fsrContext != null) + { + _fsrContext.Destroy(); + _fsrContext = null; + } + } + + public override void Render(PostProcessRenderContext context, Upscaling config) + { + var cmd = context.command; + cmd.BeginSample("FSR3 Upscaler"); + + SetupDispatchDescription(context, config); + + if (config.autoGenerateReactiveMask) + { + SetupAutoReactiveDescription(context, config); + + var scaledRenderSize = _genReactiveDescription.RenderSize; + cmd.GetTemporaryRT(Fsr3ShaderIDs.UavAutoReactive, scaledRenderSize.x, scaledRenderSize.y, 0, default, GraphicsFormat.R8_UNorm, 1, true); + _fsrContext.GenerateReactiveMask(_genReactiveDescription, cmd); + _dispatchDescription.Reactive = new ResourceView(Fsr3ShaderIDs.UavAutoReactive); + } + + _fsrContext.Dispatch(_dispatchDescription, cmd); + + cmd.EndSample("FSR3 Upscaler"); + } + + private void SetupDispatchDescription(PostProcessRenderContext context, Upscaling config) + { + var camera = context.camera; + + // Set up the main FSR3 Upscaler dispatch parameters + _dispatchDescription.Color = new ResourceView(context.source); + _dispatchDescription.Depth = new ResourceView(Upscaling.GetDepthTexture(context.camera), RenderTextureSubElement.Depth); + _dispatchDescription.MotionVectors = new ResourceView(BuiltinRenderTextureType.MotionVectors); + _dispatchDescription.Exposure = ResourceView.Unassigned; + _dispatchDescription.Reactive = ResourceView.Unassigned; + _dispatchDescription.TransparencyAndComposition = ResourceView.Unassigned; + + if (config.exposureSource == Upscaling.ExposureSource.Manual && config.exposure != null) _dispatchDescription.Exposure = new ResourceView(config.exposure); + if (config.exposureSource == Upscaling.ExposureSource.Unity) _dispatchDescription.Exposure = new ResourceView(context.autoExposureTexture); + if (config.reactiveMask != null) _dispatchDescription.Reactive = new ResourceView(config.reactiveMask); + if (config.transparencyAndCompositionMask != null) _dispatchDescription.TransparencyAndComposition = new ResourceView(config.transparencyAndCompositionMask); + + var scaledRenderSize = config.GetScaledRenderSize(context.camera); + + _dispatchDescription.Output = new ResourceView(context.destination); + _dispatchDescription.PreExposure = config.preExposure; + _dispatchDescription.EnableSharpening = config.performSharpenPass; + _dispatchDescription.Sharpness = config.sharpness; + _dispatchDescription.JitterOffset = config.JitterOffset; + _dispatchDescription.MotionVectorScale.x = -scaledRenderSize.x; + _dispatchDescription.MotionVectorScale.y = -scaledRenderSize.y; + _dispatchDescription.RenderSize = scaledRenderSize; + _dispatchDescription.UpscaleSize = config.UpscaleSize; + _dispatchDescription.FrameTimeDelta = Time.unscaledDeltaTime; + _dispatchDescription.CameraNear = camera.nearClipPlane; + _dispatchDescription.CameraFar = camera.farClipPlane; + _dispatchDescription.CameraFovAngleVertical = camera.fieldOfView * Mathf.Deg2Rad; + _dispatchDescription.ViewSpaceToMetersFactor = 1.0f; // 1 unit is 1 meter in Unity + _dispatchDescription.VelocityFactor = config.velocityFactor; + _dispatchDescription.Reset = config.Reset; + + // Set up the parameters for the optional experimental auto-TCR feature + _dispatchDescription.EnableAutoReactive = config.autoGenerateTransparencyAndComposition; + if (config.autoGenerateTransparencyAndComposition) + { + _dispatchDescription.ColorOpaqueOnly = new ResourceView(config.ColorOpaqueOnly); + _dispatchDescription.AutoTcThreshold = config.generateTransparencyAndCompositionParameters.autoTcThreshold; + _dispatchDescription.AutoTcScale = config.generateTransparencyAndCompositionParameters.autoTcScale; + _dispatchDescription.AutoReactiveScale = config.generateTransparencyAndCompositionParameters.autoReactiveScale; + _dispatchDescription.AutoReactiveMax = config.generateTransparencyAndCompositionParameters.autoReactiveMax; + } + + if (SystemInfo.usesReversedZBuffer) + { + // Swap the near and far clip plane distances as FSR3 Upscaler expects this when using inverted depth + (_dispatchDescription.CameraNear, _dispatchDescription.CameraFar) = (_dispatchDescription.CameraFar, _dispatchDescription.CameraNear); + } + } + + private void SetupAutoReactiveDescription(PostProcessRenderContext context, Upscaling config) + { + // Set up the parameters to auto-generate a reactive mask + _genReactiveDescription.ColorOpaqueOnly = new ResourceView(config.ColorOpaqueOnly); + _genReactiveDescription.ColorPreUpscale = new ResourceView(context.source); + _genReactiveDescription.OutReactive = new ResourceView(Fsr3ShaderIDs.UavAutoReactive); + _genReactiveDescription.RenderSize = config.GetScaledRenderSize(context.camera); + _genReactiveDescription.Scale = config.generateReactiveParameters.scale; + _genReactiveDescription.CutoffThreshold = config.generateReactiveParameters.cutoffThreshold; + _genReactiveDescription.BinaryValue = config.generateReactiveParameters.binaryValue; + _genReactiveDescription.Flags = (Fsr3Upscaler.GenerateReactiveFlags)config.generateReactiveParameters.flags; + } + } +} diff --git a/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling/FSR3Upscaler.cs.meta b/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling/FSR3Upscaler.cs.meta new file mode 100644 index 0000000..9e7728a --- /dev/null +++ b/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling/FSR3Upscaler.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: fbdc9b27493f9ee43aa512e20a9ce7bf +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling/Upscaler.cs b/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling/Upscaler.cs new file mode 100644 index 0000000..4fa4498 --- /dev/null +++ b/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling/Upscaler.cs @@ -0,0 +1,13 @@ +using System; + +namespace UnityEngine.Rendering.PostProcessing +{ + internal abstract class Upscaler + { + public abstract void CreateContext(PostProcessRenderContext context, Upscaling config); + + public abstract void DestroyContext(); + + public abstract void Render(PostProcessRenderContext context, Upscaling config); + } +} diff --git a/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling/Upscaler.cs.meta b/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling/Upscaler.cs.meta new file mode 100644 index 0000000..712bf4c --- /dev/null +++ b/Packages/com.unity.postprocessing/PostProcessing/Runtime/Effects/Upscaling/Upscaler.cs.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: fd3ec2f422404d659f599fbfaf38dcd7 +timeCreated: 1729512050 \ No newline at end of file diff --git a/Packages/com.unity.postprocessing/PostProcessing/Runtime/PostProcessLayer.cs b/Packages/com.unity.postprocessing/PostProcessing/Runtime/PostProcessLayer.cs index b1d4168..ac16ac7 100644 --- a/Packages/com.unity.postprocessing/PostProcessing/Runtime/PostProcessLayer.cs +++ b/Packages/com.unity.postprocessing/PostProcessing/Runtime/PostProcessLayer.cs @@ -53,7 +53,7 @@ namespace UnityEngine.Rendering.PostProcessing /// /// FidelityFX Super Resolution 3 (FSR3) Upscaler. /// - SuperResolution, + AdvancedUpscaling, } /// @@ -97,9 +97,9 @@ namespace UnityEngine.Rendering.PostProcessing public TemporalAntialiasing temporalAntialiasing; /// - /// FSR3 upscaling & anti-aliasing settings for this camera. + /// Advanced upscaling & anti-aliasing settings for this camera. /// - public SuperResolution superResolution; + public Upscaling superResolution; /// /// Subpixel Morphological Anti-aliasing settings for this camera. @@ -635,7 +635,7 @@ namespace UnityEngine.Rendering.PostProcessing if (context.IsSuperResolutionActive()) { superResolution.ConfigureCameraViewport(context); - context.SetRenderSize(superResolution.renderSize); + context.SetRenderSize(superResolution.MaxRenderSize); } else { @@ -775,7 +775,7 @@ namespace UnityEngine.Rendering.PostProcessing if (!finalBlitToCameraTarget && m_CurrentContext.IsSuperResolutionActive()) { - var displaySize = superResolution.displaySize; + var displaySize = superResolution.UpscaleSize; m_upscaledOutput = context.GetScreenSpaceTemporaryRT(widthOverride: displaySize.x, heightOverride: displaySize.y); context.destination = m_upscaledOutput; } @@ -1019,7 +1019,7 @@ namespace UnityEngine.Rendering.PostProcessing context.debugLayer = debugLayer; context.antialiasing = antialiasingMode; context.temporalAntialiasing = temporalAntialiasing; - context.superResolution = superResolution; + context.upscaling = superResolution; context.logHistogram = m_LogHistogram; #if UNITY_2018_2_OR_NEWER @@ -1197,15 +1197,15 @@ namespace UnityEngine.Rendering.PostProcessing superResolution.ConfigureJitteredProjectionMatrix(context); // Set the upscaler's output to full display resolution, as well as for all following post-processing effects - context.SetRenderSize(superResolution.displaySize); + context.SetRenderSize(superResolution.UpscaleSize); - var fsrTarget = m_TargetPool.Get(); + var upscaleTarget = m_TargetPool.Get(); var finalDestination = context.destination; - context.GetScreenSpaceTemporaryRT(cmd, fsrTarget, 0, context.sourceFormat, isUpscaleOutput: true); - context.destination = fsrTarget; - superResolution.colorOpaqueOnly = m_opaqueOnly; + context.GetScreenSpaceTemporaryRT(cmd, upscaleTarget, 0, context.sourceFormat, isUpscaleOutput: true); + context.destination = upscaleTarget; + superResolution.ColorOpaqueOnly = m_opaqueOnly; superResolution.Render(context); - context.source = fsrTarget; + context.source = upscaleTarget; context.destination = finalDestination; // Disable dynamic scaling on render targets, so all subsequent effects will be applied on the full resolution upscaled image @@ -1214,7 +1214,7 @@ namespace UnityEngine.Rendering.PostProcessing if (lastTarget > -1) cmd.ReleaseTemporaryRT(lastTarget); - lastTarget = fsrTarget; + lastTarget = upscaleTarget; } bool hasBeforeStackEffects = HasActiveEffects(PostProcessEvent.BeforeStack, context); diff --git a/Packages/com.unity.postprocessing/PostProcessing/Runtime/PostProcessRenderContext.cs b/Packages/com.unity.postprocessing/PostProcessing/Runtime/PostProcessRenderContext.cs index 8a23e55..a0795f2 100644 --- a/Packages/com.unity.postprocessing/PostProcessing/Runtime/PostProcessRenderContext.cs +++ b/Packages/com.unity.postprocessing/PostProcessing/Runtime/PostProcessRenderContext.cs @@ -218,7 +218,7 @@ namespace UnityEngine.Rendering.PostProcessing /// /// A reference to the FSR3 Upscaler settings for the rendering layer. /// - public SuperResolution superResolution { get; internal set; } + public Upscaling upscaling { get; internal set; } // Internal values used for builtin effects // Beware, these may not have been set before a specific builtin effect has been executed @@ -290,10 +290,10 @@ namespace UnityEngine.Rendering.PostProcessing public bool IsSuperResolutionActive() { - return antialiasing == PostProcessLayer.Antialiasing.SuperResolution + return antialiasing == PostProcessLayer.Antialiasing.AdvancedUpscaling && Application.isPlaying && !isSceneView - && superResolution.IsSupported(); + && upscaling.IsSupported(); } /// diff --git a/Packages/com.unity.postprocessing/PostProcessing/Runtime/PostProcessResources.cs b/Packages/com.unity.postprocessing/PostProcessing/Runtime/PostProcessResources.cs index e745fa6..5b4df0b 100644 --- a/Packages/com.unity.postprocessing/PostProcessing/Runtime/PostProcessResources.cs +++ b/Packages/com.unity.postprocessing/PostProcessing/Runtime/PostProcessResources.cs @@ -1,4 +1,5 @@ using System; +using FidelityFX.FSR2; using FidelityFX.FSR3; namespace UnityEngine.Rendering.PostProcessing @@ -214,10 +215,15 @@ namespace UnityEngine.Rendering.PostProcessing /// public ComputeShader gaussianDownsample; + /// + /// Compute shaders used by the FidelityFX Super Resolution 2 (FSR2) Upscaler. + /// + public Fsr2Shaders fsr2Upscaler; + /// /// Compute shaders used by the FidelityFX Super Resolution 3 (FSR3) Upscaler. /// - public Fsr3UpscalerShaders superResolution; + public Fsr3UpscalerShaders fsr3Upscaler; /// /// Returns a copy of this class and its content. diff --git a/Packages/com.unity.postprocessing/package.json b/Packages/com.unity.postprocessing/package.json index 2cb7183..2db0fdd 100644 --- a/Packages/com.unity.postprocessing/package.json +++ b/Packages/com.unity.postprocessing/package.json @@ -6,6 +6,6 @@ "description": "The post-processing stack (v2) comes with a collection of effects and image filters you can apply to your cameras to improve the visuals of your games.", "dependencies": { "com.unity.modules.physics": "1.0.0", - "fidelityfx.fsr": "1.0.0" + "fidelityfx.fsr": "1.0.1" } } \ No newline at end of file diff --git a/Packages/fidelityfx.fsr/Runtime/FSR2/Fsr2Context.cs b/Packages/fidelityfx.fsr/Runtime/FSR2/Fsr2Context.cs index 38d380b..5b0b238 100644 --- a/Packages/fidelityfx.fsr/Runtime/FSR2/Fsr2Context.cs +++ b/Packages/fidelityfx.fsr/Runtime/FSR2/Fsr2Context.cs @@ -212,7 +212,7 @@ namespace FidelityFX.FSR2 // Auto exposure always used to track luma changes in locking logic commandBuffer.SetRenderTarget(_resources.AutoExposure); - commandBuffer.ClearRenderTarget(false, true, new Color(0f, 1e8f, 0f, 0f)); + commandBuffer.ClearRenderTarget(false, true, new Color(0f, 1f, 0f, 0f)); // Reset atomic counter to 0 commandBuffer.SetRenderTarget(_resources.SpdAtomicCounter); diff --git a/Packages/fidelityfx.fsr/Runtime/FSR2/Fsr2Pass.cs b/Packages/fidelityfx.fsr/Runtime/FSR2/Fsr2Pass.cs index bda5092..94ef9e5 100644 --- a/Packages/fidelityfx.fsr/Runtime/FSR2/Fsr2Pass.cs +++ b/Packages/fidelityfx.fsr/Runtime/FSR2/Fsr2Pass.cs @@ -19,6 +19,7 @@ // THE SOFTWARE. using System; +using System.Diagnostics; using System.Runtime.InteropServices; using UnityEngine; using UnityEngine.Profiling; @@ -42,7 +43,7 @@ namespace FidelityFX.FSR2 protected ComputeShader ComputeShader; protected int KernelIndex; - protected CustomSampler Sampler; + private CustomSampler _sampler; protected Fsr2Pass(Fsr2.ContextDescription contextDescription, Fsr2Resources resources, ComputeBuffer constants) { @@ -57,9 +58,9 @@ namespace FidelityFX.FSR2 public void ScheduleDispatch(CommandBuffer commandBuffer, Fsr2.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { - commandBuffer.BeginSample(Sampler); + BeginSample(commandBuffer); DoScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchX, dispatchY); - commandBuffer.EndSample(Sampler); + EndSample(commandBuffer); } protected abstract void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr2.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY); @@ -78,7 +79,7 @@ namespace FidelityFX.FSR2 ComputeShader = shader; KernelIndex = ComputeShader.FindKernel("CS"); - Sampler = CustomSampler.Create(passName); + _sampler = CustomSampler.Create(passName); bool useLut = false; #if UNITY_2022_1_OR_NEWER // This will also work in 2020.3.43+ and 2021.3.14+ @@ -96,6 +97,18 @@ namespace FidelityFX.FSR2 if (useLut) ComputeShader.EnableKeyword("FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE"); if ((flags & Fsr2.InitializationFlags.EnableFP16Usage) != 0) ComputeShader.EnableKeyword("FFX_HALF"); } + + [Conditional("ENABLE_PROFILER")] + protected void BeginSample(CommandBuffer cmd) + { + cmd.BeginSample(_sampler); + } + + [Conditional("ENABLE_PROFILER")] + protected void EndSample(CommandBuffer cmd) + { + cmd.EndSample(_sampler); + } } internal class Fsr2ComputeLuminancePyramidPass : Fsr2Pass @@ -320,7 +333,7 @@ namespace FidelityFX.FSR2 public void ScheduleDispatch(CommandBuffer commandBuffer, Fsr2.GenerateReactiveDescription dispatchParams, int dispatchX, int dispatchY) { - commandBuffer.BeginSample(Sampler); + BeginSample(commandBuffer); ref var opaqueOnly = ref dispatchParams.ColorOpaqueOnly; ref var color = ref dispatchParams.ColorPreUpscale; @@ -334,7 +347,7 @@ namespace FidelityFX.FSR2 commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); - commandBuffer.EndSample(Sampler); + EndSample(commandBuffer); } } diff --git a/Packages/fidelityfx.fsr/Runtime/FSR3/Fsr3Upscaler.cs b/Packages/fidelityfx.fsr/Runtime/FSR3/Fsr3Upscaler.cs index d69aa29..6b3ad77 100644 --- a/Packages/fidelityfx.fsr/Runtime/FSR3/Fsr3Upscaler.cs +++ b/Packages/fidelityfx.fsr/Runtime/FSR3/Fsr3Upscaler.cs @@ -200,6 +200,7 @@ namespace FidelityFX.FSR3 public float CameraFar; public float CameraFovAngleVertical; public float ViewSpaceToMetersFactor; + public float VelocityFactor = 1.0f; public DispatchFlags Flags; public bool UseTextureArrays; // Enable texture array bindings, primarily used for HDRP and XR @@ -265,6 +266,8 @@ namespace FidelityFX.FSR3 public float deltaPreExposure; public float viewSpaceToMetersFactor; public float frameIndex; + + public float velocityFactor; } [Serializable, StructLayout(LayoutKind.Sequential)] diff --git a/Packages/fidelityfx.fsr/Runtime/FSR3/Fsr3UpscalerContext.cs b/Packages/fidelityfx.fsr/Runtime/FSR3/Fsr3UpscalerContext.cs index 2b7f2ef..07f1af7 100644 --- a/Packages/fidelityfx.fsr/Runtime/FSR3/Fsr3UpscalerContext.cs +++ b/Packages/fidelityfx.fsr/Runtime/FSR3/Fsr3UpscalerContext.cs @@ -96,6 +96,7 @@ namespace FidelityFX.FSR3 _resourceFrameIndex = 0; UpscalerConsts.maxUpscaleSize = _contextDescription.MaxUpscaleSize; + UpscalerConsts.velocityFactor = 1.0f; _resources.Create(_contextDescription); CreatePasses(); @@ -232,7 +233,7 @@ namespace FidelityFX.FSR3 // Auto exposure always used to track luma changes in locking logic commandBuffer.SetRenderTarget(_resources.FrameInfo); - commandBuffer.ClearRenderTarget(false, true, new Color(0f, 1e8f, 0f, 0f)); + commandBuffer.ClearRenderTarget(false, true, new Color(0f, 1f, 0f, 0f)); // Reset atomic counter to 0 commandBuffer.SetRenderTarget(_resources.SpdAtomicCounter); @@ -407,6 +408,8 @@ namespace FidelityFX.FSR3 constants.frameIndex = 0; else constants.frameIndex += 1.0f; + + constants.velocityFactor = dispatchParams.VelocityFactor; } private Vector4 SetupDeviceDepthToViewSpaceDepthParams(Fsr3Upscaler.DispatchDescription dispatchParams) diff --git a/Packages/fidelityfx.fsr/Runtime/FSR3/Fsr3UpscalerPass.cs b/Packages/fidelityfx.fsr/Runtime/FSR3/Fsr3UpscalerPass.cs index 3b76eb4..f60e1f6 100644 --- a/Packages/fidelityfx.fsr/Runtime/FSR3/Fsr3UpscalerPass.cs +++ b/Packages/fidelityfx.fsr/Runtime/FSR3/Fsr3UpscalerPass.cs @@ -19,6 +19,7 @@ // THE SOFTWARE. using System; +using System.Diagnostics; using System.Runtime.InteropServices; using UnityEngine; using UnityEngine.Profiling; @@ -40,7 +41,7 @@ namespace FidelityFX.FSR3 protected ComputeShader ComputeShader; protected int KernelIndex; - protected CustomSampler Sampler; + private CustomSampler _sampler; protected Fsr3UpscalerPass(Fsr3Upscaler.ContextDescription contextDescription, Fsr3UpscalerResources resources, ComputeBuffer constants) { @@ -55,9 +56,9 @@ namespace FidelityFX.FSR3 public void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY) { - commandBuffer.BeginSample(Sampler); + BeginSample(commandBuffer); DoScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchX, dispatchY); - commandBuffer.EndSample(Sampler); + EndSample(commandBuffer); } protected abstract void DoScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY); @@ -76,7 +77,7 @@ namespace FidelityFX.FSR3 ComputeShader = shader; KernelIndex = ComputeShader.FindKernel("CS"); - Sampler = CustomSampler.Create(passName); + _sampler = CustomSampler.Create(passName); bool useLut = false; #if UNITY_2022_1_OR_NEWER // This will also work in 2020.3.43+ and 2021.3.14+ @@ -94,6 +95,18 @@ namespace FidelityFX.FSR3 if (useLut) ComputeShader.EnableKeyword("FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE"); if ((flags & Fsr3Upscaler.InitializationFlags.EnableFP16Usage) != 0) ComputeShader.EnableKeyword("FFX_HALF"); } + + [Conditional("ENABLE_PROFILER")] + protected void BeginSample(CommandBuffer cmd) + { + cmd.BeginSample(_sampler); + } + + [Conditional("ENABLE_PROFILER")] + protected void EndSample(CommandBuffer cmd) + { + cmd.EndSample(_sampler); + } } internal class Fsr3UpscalerPrepareInputsPass : Fsr3UpscalerPass @@ -384,7 +397,7 @@ namespace FidelityFX.FSR3 public void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.GenerateReactiveDescription dispatchParams, int dispatchX, int dispatchY) { - commandBuffer.BeginSample(Sampler); + BeginSample(commandBuffer); ref var opaqueOnly = ref dispatchParams.ColorOpaqueOnly; ref var color = ref dispatchParams.ColorPreUpscale; @@ -398,7 +411,7 @@ namespace FidelityFX.FSR3 commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1); - commandBuffer.EndSample(Sampler); + EndSample(commandBuffer); } } diff --git a/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_accumulate_pass.compute b/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_accumulate_pass.compute index 63532d7..63a4387 100644 --- a/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_accumulate_pass.compute +++ b/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_accumulate_pass.compute @@ -25,7 +25,6 @@ #pragma multi_compile_local __ FFX_FSR2_OPTION_HDR_COLOR_INPUT #pragma multi_compile_local __ FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS #pragma multi_compile_local __ FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR2_OPTION_INVERTED_DEPTH #pragma multi_compile_local __ FFX_FSR2_OPTION_APPLY_SHARPENING #pragma multi_compile __ UNITY_FSR_TEXTURE2D_X_ARRAY diff --git a/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_autogen_reactive_pass.compute b/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_autogen_reactive_pass.compute index 6bc2301..063e5d7 100644 --- a/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_autogen_reactive_pass.compute +++ b/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_autogen_reactive_pass.compute @@ -21,9 +21,6 @@ #pragma kernel CS #pragma multi_compile_local __ FFX_HALF -#pragma multi_compile_local __ FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR2_OPTION_INVERTED_DEPTH #pragma multi_compile __ UNITY_FSR_TEXTURE2D_X_ARRAY diff --git a/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_compute_luminance_pyramid_pass.compute b/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_compute_luminance_pyramid_pass.compute index 7058cc5..c63dfe6 100644 --- a/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_compute_luminance_pyramid_pass.compute +++ b/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_compute_luminance_pyramid_pass.compute @@ -21,9 +21,6 @@ #pragma kernel CS #pragma multi_compile_local __ FFX_HALF -#pragma multi_compile_local __ FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR2_OPTION_INVERTED_DEPTH #pragma multi_compile __ UNITY_FSR_TEXTURE2D_X_ARRAY diff --git a/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_lock_pass.compute b/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_lock_pass.compute index fb12d2c..2e86474 100644 --- a/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_lock_pass.compute +++ b/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_lock_pass.compute @@ -21,8 +21,6 @@ #pragma kernel CS #pragma multi_compile_local __ FFX_HALF -#pragma multi_compile_local __ FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS #pragma multi_compile_local __ FFX_FSR2_OPTION_INVERTED_DEPTH #pragma multi_compile __ UNITY_FSR_TEXTURE2D_X_ARRAY diff --git a/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_rcas_pass.compute b/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_rcas_pass.compute index e6ac7df..39c7ef3 100644 --- a/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_rcas_pass.compute +++ b/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_rcas_pass.compute @@ -20,10 +20,6 @@ #pragma kernel CS -#pragma multi_compile_local __ FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR2_OPTION_INVERTED_DEPTH - #pragma multi_compile __ UNITY_FSR_TEXTURE2D_X_ARRAY #include "ffx_fsr_unity_common.cginc" diff --git a/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_tcr_autogen_pass.compute b/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_tcr_autogen_pass.compute index 5d2668a..bb1b780 100644 --- a/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_tcr_autogen_pass.compute +++ b/Packages/fidelityfx.fsr/Shaders/ffx_fsr2_tcr_autogen_pass.compute @@ -21,9 +21,7 @@ #pragma kernel CS #pragma multi_compile_local __ FFX_HALF -#pragma multi_compile_local __ FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS #pragma multi_compile_local __ FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS -#pragma multi_compile_local __ FFX_FSR2_OPTION_INVERTED_DEPTH #pragma multi_compile __ UNITY_FSR_TEXTURE2D_X_ARRAY diff --git a/Packages/fidelityfx.fsr/Shaders/ffx_fsr3upscaler_accumulate_pass.compute b/Packages/fidelityfx.fsr/Shaders/ffx_fsr3upscaler_accumulate_pass.compute index a8d6864..bc0b1b7 100644 --- a/Packages/fidelityfx.fsr/Shaders/ffx_fsr3upscaler_accumulate_pass.compute +++ b/Packages/fidelityfx.fsr/Shaders/ffx_fsr3upscaler_accumulate_pass.compute @@ -24,6 +24,7 @@ #pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE #pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT #pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS #pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_APPLY_SHARPENING #pragma multi_compile __ UNITY_FSR_TEXTURE2D_X_ARRAY diff --git a/Packages/fidelityfx.fsr/Shaders/ffx_fsr_unity_common.cginc b/Packages/fidelityfx.fsr/Shaders/ffx_fsr_unity_common.cginc index b1bcb5c..364ae32 100644 --- a/Packages/fidelityfx.fsr/Shaders/ffx_fsr_unity_common.cginc +++ b/Packages/fidelityfx.fsr/Shaders/ffx_fsr_unity_common.cginc @@ -38,6 +38,11 @@ //#pragma require Native16Bit //#endif +// Allow use of Xbox Series-specific optimizations +// #if defined(SHADER_API_GAMECORE_XBOXSERIES) +// #define __XBOX_SCARLETT +// #endif + // Hack to work around the lack of texture atomics on Metal #if defined(SHADER_API_METAL) #define InterlockedAdd(dest, val, orig) { (orig) = (dest); (dest) += (val); } diff --git a/Packages/fidelityfx.fsr/Shaders/shaders/ffx_fsr3upscaler_luma_pyramid_pass.hlsl b/Packages/fidelityfx.fsr/Shaders/shaders/ffx_fsr3upscaler_luma_pyramid_pass.hlsl index 37fa1b7..0d6b67d 100644 --- a/Packages/fidelityfx.fsr/Shaders/shaders/ffx_fsr3upscaler_luma_pyramid_pass.hlsl +++ b/Packages/fidelityfx.fsr/Shaders/shaders/ffx_fsr3upscaler_luma_pyramid_pass.hlsl @@ -30,8 +30,12 @@ #define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_2 4 #define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_3 5 #define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4 6 +#ifdef SHADER_API_GLCORE +#define FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH_MIP1 7 +#else #define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5 7 #define FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH_MIP1 8 +#endif #define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 diff --git a/Packages/fidelityfx.fsr/Shaders/shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.hlsl b/Packages/fidelityfx.fsr/Shaders/shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.hlsl index 5403792..fe85c3e 100644 --- a/Packages/fidelityfx.fsr/Shaders/shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.hlsl +++ b/Packages/fidelityfx.fsr/Shaders/shaders/ffx_fsr3upscaler_shading_change_pyramid_pass.hlsl @@ -32,7 +32,9 @@ #define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_2 3 #define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_3 4 #define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4 5 +#ifndef SHADER_API_GLCORE #define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5 6 +#endif #define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 0 diff --git a/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_callbacks_hlsl.h b/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_callbacks_hlsl.h index c52cc1a..9007343 100644 --- a/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_callbacks_hlsl.h +++ b/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_callbacks_hlsl.h @@ -558,6 +558,14 @@ FfxFloat32x3 LoadPreparedInputColor(FfxUInt32x2 iPxPos) { return r_prepared_input_color[iPxPos].xyz; } + +#if FFX_HALF && defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) +FFX_MIN16_F3 LoadPreparedInputColorHalf(FfxUInt32x2 iPxPos) +{ + return FFX_MIN16_F3(r_prepared_input_color[iPxPos].xyz); +} +#endif + #endif #if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS) @@ -801,6 +809,27 @@ FfxFloat32 SampleLanczos2Weight(FfxFloat32 x) #endif } +#if FFX_HALF && defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) + +FFX_MIN16_F SampleLanczos2Weight_NoValu(FFX_MIN16_F x) +{ +#if defined(FSR2_BIND_SRV_LANCZOS_LUT) + return FFX_MIN16_F(r_lanczos_lut.SampleLevel(s_LinearClamp, __XB_AsHalf(__XB_V_PACK_B32_F16(x, 0.5)), 0)); +#else + return 0.0; +#endif +} + +FFX_MIN16_F SampleLanczos2Weight_NoValuNoA16(FfxFloat32 x) +{ +#if defined(FSR2_BIND_SRV_LANCZOS_LUT) + return FFX_MIN16_F(r_lanczos_lut.SampleLevel(s_LinearClamp, FfxFloat32x2(x, 0.5), 0)); +#else + return 0.0; +#endif +} +#endif + #if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv) { diff --git a/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_common.h b/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_common.h index e46b66c..d20431b 100644 --- a/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_common.h +++ b/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_common.h @@ -54,7 +54,7 @@ FFX_STATIC const FfxFloat32 fAverageLanczosWeightPerFrame = 0.74f * fUpsampleLan FFX_STATIC const FfxFloat32 fAccumulationMaxOnMotion = 3.0f * fUpsampleLanczosWeightScale; // Auto exposure -FFX_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e8f; +FFX_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e4f; struct AccumulationPassCommonParams { @@ -509,6 +509,32 @@ FfxFloat32x3 UnprepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure) return fRgb; } +#if FFX_HALF && defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) + +void PrepareRgbPaired(inout FFX_MIN16_F2 r, inout FFX_MIN16_F2 g, inout FFX_MIN16_F2 b, FfxFloat32 fExposure, FfxFloat32 fPreExposure) +{ + FFX_MIN16_F ExposureOverPreExposureOver = FFX_MIN16_F(fExposure / fPreExposure); + + r *= ExposureOverPreExposureOver; + g *= ExposureOverPreExposureOver; + b *= ExposureOverPreExposureOver; + + r = ffxClampHalf(r, 0.0, FSR2_FP16_MAX); + g = ffxClampHalf(g, 0.0, FSR2_FP16_MAX); + b = ffxClampHalf(b, 0.0, FSR2_FP16_MAX); +} + +void UnprepareRgbPaired(inout FFX_MIN16_F2 r, inout FFX_MIN16_F2 g, inout FFX_MIN16_F2 b, FfxFloat32 fExposure) +{ + FFX_MIN16_F PreExposureOverExposure = FFX_MIN16_F(PreExposure() / fExposure); + + r *= PreExposureOverExposure; + g *= PreExposureOverExposure; + b *= PreExposureOverExposure; +} + +#endif + struct BilinearSamplingData { diff --git a/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_compute_luminance_pyramid.h b/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_compute_luminance_pyramid.h index d8d4820..a0e74b0 100644 --- a/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_compute_luminance_pyramid.h +++ b/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_compute_luminance_pyramid.h @@ -83,6 +83,7 @@ void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 { FfxFloat32 rate = 1.0f; result = prev + (result - prev) * (1 - exp(-DeltaTime() * rate)); + result = ffxMax(0.0f, result); } FfxFloat32x2 spdOutput = FfxFloat32x2(ComputeAutoExposureFromLavg(result), result); SPD_SetExposureBuffer(spdOutput); diff --git a/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_rcas.h b/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_rcas.h index fd5fd26..1a8c756 100644 --- a/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_rcas.h +++ b/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_rcas.h @@ -25,38 +25,80 @@ #include "../ffx_core.h" +#if FFX_HALF && defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) + #define FSR_RCAS_PREFER_PAIRED_VERSION 1 +#else + #define FSR_RCAS_PREFER_PAIRED_VERSION 0 +#endif + void WriteUpscaledOutput(FFX_MIN16_U2 iPxHrPos, FfxFloat32x3 fUpscaledColor) { StoreUpscaledOutput(FFX_MIN16_I2(iPxHrPos), fUpscaledColor); } -#define FSR_RCAS_F 1 -FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p) -{ - FfxFloat32x4 fColor = LoadRCAS_Input(p); +#if FSR_RCAS_PREFER_PAIRED_VERSION + #define FSR_RCAS_HX2 1 - fColor.rgb = PrepareRgb(fColor.rgb, Exposure(), PreExposure()); + FfxFloat16x4 FsrRcasLoadHx2(FfxInt16x2 p) + { + return FfxFloat16x4(LoadRCAS_Input(p)); + } + void FsrRcasInputHx2(inout FfxFloat16x2 r, inout FfxFloat16x2 g, inout FfxFloat16x2 b) + { + PrepareRgbPaired(r, g, b, Exposure(), PreExposure()); + } - return fColor; -} -void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {} + #include "../fsr1/ffx_fsr1.h" -#include "../fsr1/ffx_fsr1.h" + void CurrFilterPaired(FFX_MIN16_U2 pos) + { + FfxFloat16x2 cr; + FfxFloat16x2 cg; + FfxFloat16x2 cb; + FsrRcasHx2(cr, cg, cb, pos, RCASConfig()); -void CurrFilter(FFX_MIN16_U2 pos) -{ - FfxFloat32x3 c; - FsrRcasF(c.r, c.g, c.b, pos, RCASConfig()); + UnprepareRgbPaired(cr, cg, cb, Exposure()); - c = UnprepareRgb(c, Exposure()); + WriteUpscaledOutput(pos, FfxFloat16x3(cr.x, cg.x, cb.x)); //TODO: fix type + pos.x += 8; + WriteUpscaledOutput(pos, FfxFloat16x3(cr.y, cg.y, cb.y)); //TODO: fix type + } - WriteUpscaledOutput(pos, c); -} +#else + #define FSR_RCAS_F 1 + FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p) + { + FfxFloat32x4 fColor = LoadRCAS_Input(p); + + fColor.rgb = PrepareRgb(fColor.rgb, Exposure(), PreExposure()); + + return fColor; + } + void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {} + + #include "../fsr1/ffx_fsr1.h" + + void CurrFilter(FFX_MIN16_U2 pos) + { + FfxFloat32x3 c; + FsrRcasF(c.r, c.g, c.b, pos, RCASConfig()); + + c = UnprepareRgb(c, Exposure()); + + WriteUpscaledOutput(pos, c); + } + +#endif // #if FSR_RCAS_PREFER_PAIRED_VERSION void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid) { // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u); +#if FSR_RCAS_PREFER_PAIRED_VERSION + CurrFilterPaired(FFX_MIN16_U2(gxy)); + gxy.y += 8u; + CurrFilterPaired(FFX_MIN16_U2(gxy)); +#else CurrFilter(FFX_MIN16_U2(gxy)); gxy.x += 8u; CurrFilter(FFX_MIN16_U2(gxy)); @@ -64,4 +106,5 @@ void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid) CurrFilter(FFX_MIN16_U2(gxy)); gxy.x -= 8u; CurrFilter(FFX_MIN16_U2(gxy)); +#endif } diff --git a/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_sample.h b/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_sample.h index b75f090..cd7142a 100644 --- a/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_sample.h +++ b/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_sample.h @@ -139,14 +139,6 @@ FfxFloat32 Lanczos2(FfxFloat32 x) #if FFX_HALF -#if 0 -FFX_MIN16_F Lanczos2NoClamp(FFX_MIN16_F x) -{ - const FFX_MIN16_F PI = FFX_MIN16_F(3.141592653589793f); // TODO: share SDK constants - return abs(x) < FFX_MIN16_F(FSR2_EPSILON) ? FFX_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFX_MIN16_F(0.5f) * PI * x) / (FFX_MIN16_F(0.5f) * PI * x)); -} -#endif - FFX_MIN16_F Lanczos2(FFX_MIN16_F x) { x = ffxMin(abs(x), FFX_MIN16_F(2.0f)); @@ -169,6 +161,26 @@ FFX_MIN16_F Lanczos2ApproxSqNoClamp(FFX_MIN16_F x2) FFX_MIN16_F b = FFX_MIN16_F(1.0f / 4.0f) * x2 - FFX_MIN16_F(1); return (FFX_MIN16_F(25.0f / 16.0f) * a * a - FFX_MIN16_F(25.0f / 16.0f - 1)) * (b * b); } + +#if defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) + +FFX_MIN16_F2 PairedLanczos2ApproxSqNoClamp(FFX_MIN16_F2 x2) +{ + // Xbox ATG (Pavel): + // + // 2.0 * x2 - 5.0 25.0 25.0 - 16.0 (2.0 * x2 - 5.0)^2 - (3.0)^2 (2.0 * x2 - 8.0) * (2.0 * x2 - 2.0) (x2 - 4.0) * (x2 - 1.0) + // a = -------------- ==> ---- * a^2 - -------------- = ----------------------------- = ---------------------------------- = ----------------------- = b * (x2 - 1.0) + // 5.0 16.0 16.0 16.0 16.0 4.0 + // + // so we need to compute just (b * b) * (b * x2 - b), so we should get four packed instructions: 2 fma + 2 mul + // + + FFX_MIN16_F2 b = (0.25 * x2 - 1.0); + return (b * b) * (b * x2 - b); +} + +#endif + #endif //FFX_HALF FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2) @@ -183,6 +195,15 @@ FFX_MIN16_F Lanczos2ApproxSq(FFX_MIN16_F x2) x2 = ffxMin(x2, FFX_MIN16_F(4.0f)); return Lanczos2ApproxSqNoClamp(x2); } + +#if defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) +FFX_MIN16_F2 PairedLanczos2ApproxSq(FFX_MIN16_F2 x2) +{ + x2 = ffxMin(x2, FFX_MIN16_F2(4.0, 4.0)); + return PairedLanczos2ApproxSqNoClamp(x2); +} +#endif + #endif //FFX_HALF FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x) @@ -219,6 +240,21 @@ FFX_MIN16_F Lanczos2_UseLUT(FFX_MIN16_F x) { return FFX_MIN16_F(SampleLanczos2Weight(abs(x))); } + +#if defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) + +FFX_MIN16_F Lanczos2_UseLUTNoAbs(FFX_MIN16_F x) +{ + return SampleLanczos2Weight_NoValu(x); +} + +FFX_MIN16_F Lanczos2_UseLUTNoAbsNoA16(FfxFloat32 x) +{ + return SampleLanczos2Weight_NoValuNoA16(x); +} + +#endif + #endif //FFX_HALF FfxFloat32x4 Lanczos2_UseLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) @@ -364,6 +400,19 @@ FfxFloat32x4 Lanczos2LUT(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) } #if FFX_HALF + +#if defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) +FFX_MIN16_F4 Lanczos2ApplyWeightX(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F2 fWeight0, FFX_MIN16_F2 fWeight1, FFX_MIN16_F2 fWeight2, FFX_MIN16_F2 fWeight3, FFX_MIN16_F2 fWeightSumInverted) +{ + return (((fWeight0.x * fColor0) + fWeight1.x * fColor1) + ((fWeight2.x * fColor2) + fWeight3.x * fColor3)) * fWeightSumInverted.x; +} + +FFX_MIN16_F4 Lanczos2ApplyWeightY(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F2 fWeight0, FFX_MIN16_F2 fWeight1, FFX_MIN16_F2 fWeight2, FFX_MIN16_F2 fWeight3, FFX_MIN16_F2 fWeightSumInverted) +{ + return (((fWeight0.y * fColor0) + fWeight1.y * fColor1) + ((fWeight2.y * fColor2) + fWeight3.y * fColor3)) * fWeightSumInverted.y; +} +#endif + FFX_MIN16_F4 Lanczos2LUT(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac) { FFX_MIN16_F4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); diff --git a/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_upsample.h b/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_upsample.h index 9287185..2281d98 100644 --- a/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_upsample.h +++ b/Packages/fidelityfx.fsr/Shaders/shaders/fsr2/ffx_fsr2_upsample.h @@ -83,22 +83,167 @@ FfxFloat32 ComputeMaxKernelWeight() { return ffxMin(FfxFloat32(1.99f), fKernelWeight); } + +#if FFX_HALF && (FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2) && defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) +#define FFX_FSR2_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS 1 +#else +#define FFX_FSR2_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS 0 +#endif + +#if FFX_FSR2_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS + +FFX_MIN16_F2 Bool2ToFloat16x2(bool x, bool y) +{ + uint lo = x ? 0x00003c00 : 0x00000000; + uint hi = y ? 0x3c000000 : 0x00000000; + + return FFX_MIN16_F2(__XB_AsHalf(lo).x, __XB_AsHalf(hi).y); +} + +struct PairedRectificationBoxAndAccumulatedColorAndWeight +{ + FFX_MIN16_F2 aabbMinRG; + FFX_MIN16_F2 aabbMinB; + + FFX_MIN16_F2 aabbMaxRG; + FFX_MIN16_F2 aabbMaxB; + + FFX_MIN16_F2 boxCenterRG; + FFX_MIN16_F2 boxCenterB; + + FFX_MIN16_F2 boxVecRG; + FFX_MIN16_F2 boxVecB; + + FFX_MIN16_F2 fBoxCenterWeight; + + FFX_MIN16_F2 fColorRG; + FFX_MIN16_F2 fColorB; + FFX_MIN16_F2 fWeight; + + FFX_MIN16_F fKernelBiasSq; + FfxFloat32 fRectificationCurveBias; + + void setKernelBiasAndRectificationCurveBias(FfxFloat32 kernelBias, FfxFloat32 rectificationCurveBias) + { + fKernelBiasSq = FFX_MIN16_F(kernelBias * kernelBias); + fRectificationCurveBias = rectificationCurveBias; + } + + void init(FFX_MIN16_F fSrcSampleOffsetSq, bool sampleOnScreenX, bool sampleOnScreenY, FFX_MIN16_F3 colorSample) + { + // NOTE: make sure exp has 32-bit precision + const FFX_MIN16_F fBoxSampleWeight = FFX_MIN16_F( + exp(fRectificationCurveBias * FfxFloat32(fSrcSampleOffsetSq)) + ); + +#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE + const FFX_MIN16_F2 LanczosUpsampleWeight = PairedLanczos2ApproxSq(fSrcSampleOffsetSq * fKernelBiasSq); +#else +#error "Only LANCZOS_TYPE_APPROXIMATE is supported in paired version so far" +#endif + const FFX_MIN16_F2 fSampleWeight = FFX_MIN16_F2((sampleOnScreenX && sampleOnScreenY ? 1.0 : 0.0), 0.0) * LanczosUpsampleWeight; + + aabbMinRG = colorSample.rg; + aabbMinB = colorSample.bb; + + aabbMaxRG = colorSample.rg; + aabbMaxB = colorSample.bb; + + boxCenterRG = colorSample.rg * fBoxSampleWeight.x; + boxCenterB = colorSample.bb * fBoxSampleWeight; + + boxVecRG = colorSample.rg * boxCenterRG; + boxVecB = colorSample.bb * boxCenterB; + + fBoxCenterWeight = fBoxSampleWeight; + + fColorRG = colorSample.rg * fSampleWeight.x; + fColorB = colorSample.bb * fSampleWeight; + fWeight = fSampleWeight; + } + + void addSample(FFX_MIN16_F2 fSrcSampleOffsetSq, bool sample0OnScreen, bool sample1OnScreen, bool sample01OnScreen, FFX_MIN16_F3 ColorSample0, FFX_MIN16_F3 ColorSample1) + { + // NOTE: make sure exp has 32-bit precision + const FFX_MIN16_F2 fBoxSampleWeight = FFX_MIN16_F2( + exp(fRectificationCurveBias * FfxFloat32(fSrcSampleOffsetSq.x)), + exp(fRectificationCurveBias * FfxFloat32(fSrcSampleOffsetSq.y)) + ); + +#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE + const FFX_MIN16_F2 LanczosUpsampleWeight = PairedLanczos2ApproxSq(fSrcSampleOffsetSq * fKernelBiasSq); +#else +#error "Only LANCZOS_TYPE_APPROXIMATE is supported in paired version so far" +#endif + const FFX_MIN16_F2 fSampleWeight = Bool2ToFloat16x2(sample0OnScreen && sample01OnScreen, sample1OnScreen && sample01OnScreen) * LanczosUpsampleWeight; + + FFX_MIN16_F2 colorSampleB = FFX_MIN16_F2(ColorSample0.b, ColorSample1.b); + + aabbMinRG = ffxMin(aabbMinRG, ColorSample0.rg); + aabbMinRG = ffxMin(aabbMinRG, ColorSample1.rg); + aabbMinB = ffxMin(aabbMinB, colorSampleB); + + aabbMaxRG = ffxMax(aabbMaxRG, ColorSample0.rg); + aabbMaxRG = ffxMax(aabbMaxRG, ColorSample1.rg); + aabbMaxB = ffxMax(aabbMaxB, colorSampleB); + + FFX_MIN16_F2 weightedColorSampleRG0 = ColorSample0.rg * fBoxSampleWeight.x; + FFX_MIN16_F2 weightedColorSampleRG1 = ColorSample1.rg * fBoxSampleWeight.y; + FFX_MIN16_F2 weightedColorSampleB = colorSampleB * fBoxSampleWeight; + + boxCenterRG += weightedColorSampleRG0; + boxCenterRG += weightedColorSampleRG1; + boxCenterB += weightedColorSampleB; + + boxVecRG += ColorSample0.rg * weightedColorSampleRG0; + boxVecRG += ColorSample1.rg * weightedColorSampleRG1; + boxVecB += colorSampleB * weightedColorSampleB; + + fBoxCenterWeight += fBoxSampleWeight; + + fWeight += fSampleWeight; + fColorRG += (ColorSample0.rg * fSampleWeight.x) + (ColorSample1.rg * fSampleWeight.y); + fColorB += colorSampleB * fSampleWeight; + } + + void finalize(FFX_PARAMETER_INOUT RectificationBox rectificationBox, FFX_PARAMETER_INOUT FfxFloat32x4 outColorAndWeight) + { + rectificationBox.aabbMin.r = FfxFloat32(aabbMinRG.x); + rectificationBox.aabbMin.g = FfxFloat32(aabbMinRG.y); + rectificationBox.aabbMin.b = FfxFloat32(ffxMin(aabbMinB.x, aabbMinB.y)); + + rectificationBox.aabbMax.r = FfxFloat32(aabbMaxRG.x); + rectificationBox.aabbMax.g = FfxFloat32(aabbMaxRG.y); + rectificationBox.aabbMax.b = FfxFloat32(ffxMax(aabbMaxB.x, aabbMaxB.y)); + + rectificationBox.boxCenter.r = FfxFloat32(boxCenterRG.x); + rectificationBox.boxCenter.g = FfxFloat32(boxCenterRG.y); + rectificationBox.boxCenter.b = FfxFloat32(boxCenterB.x + boxCenterB.y); + + rectificationBox.boxVec.r = FfxFloat32(boxVecRG.x); + rectificationBox.boxVec.g = FfxFloat32(boxVecRG.y); + rectificationBox.boxVec.b = FfxFloat32(boxVecB.x + boxVecB.y); + + rectificationBox.fBoxCenterWeight = FfxFloat32(fBoxCenterWeight.x + fBoxCenterWeight.y); + + outColorAndWeight = FfxFloat32x4(fColorRG, fColorB.x + fColorB.y, fWeight.x + fWeight.y); + } +}; +#endif + FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT RectificationBox clippingBox, FfxFloat32 fReactiveFactor) { - #if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF - #include "ffx_fsr2_force16_begin.h" - #endif // We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly) FfxFloat32x2 fDstOutputPos = FfxFloat32x2(params.iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f); // Destination resolution output pixel center position FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor(); // Source resolution output pixel center position FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos)); // TODO: what about weird upscale factors... - #if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF - #include "ffx_fsr2_force16_end.h" - #endif - +#if FFX_FSR2_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS + FFX_MIN16_F3 fSamples[iLanczos2SampleCount]; +#else FfxFloat32x3 fSamples[iLanczos2SampleCount]; +#endif FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0 @@ -113,6 +258,59 @@ FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams p const FfxBoolean bFlipRow = fSrcUnjitteredPos.y > fSrcOutputPos.y; const FfxBoolean bFlipCol = fSrcUnjitteredPos.x > fSrcOutputPos.x; +#if FFX_FSR2_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS + // Unroll the loop to load samples on Scarlett to help the shader compiler + const FFX_MIN16_F2 fSampleOffsetX02 = __XB_AsHalf(bFlipCol ? __XB_AsUInt(FFX_MIN16_F2( 1, -1)) : __XB_AsUInt(FFX_MIN16_F2(-1, 1))); + const FFX_MIN16_F2 fSampleOffsetY02 = __XB_AsHalf(bFlipRow ? __XB_AsUInt(FFX_MIN16_F2( 1, -1)) : __XB_AsUInt(FFX_MIN16_F2(-1, 1))); + + typedef FfxInt32 FfxTexCoordI; + typedef FfxInt32x2 FfxTexCoordI2; + + const FfxTexCoordI2 iSrcSamplePosX01 = FfxTexCoordI2(iSrcInputPos.xx) + (bFlipCol ? FfxTexCoordI2( 1, 0) : FfxTexCoordI2(-1, 0)); + const FfxTexCoordI2 iSrcSamplePosX23 = FfxTexCoordI2(iSrcInputPos.xx) + (bFlipCol ? FfxTexCoordI2(-1, -2) : FfxTexCoordI2( 1, 2)); + + const FfxTexCoordI2 iSrcSamplePosY01 = FfxTexCoordI2(iSrcInputPos.yy) + (bFlipRow ? FfxTexCoordI2( 1, 0) : FfxTexCoordI2(-1, 0)); + const FfxTexCoordI2 iSrcSamplePosY23 = FfxTexCoordI2(iSrcInputPos.yy) + (bFlipRow ? FfxTexCoordI2(-1, -2) : FfxTexCoordI2( 1, 2)); + + const FfxTexCoordI2 renderSizeLastTexelCoord = FfxTexCoordI2(RenderSize()) - FfxTexCoordI2(1, 1); + + const FfxTexCoordI2 iSrcSamplePosX01Clamped = FfxTexCoordI2( + __XB_Med3_I32(iSrcSamplePosX01.x, 0, renderSizeLastTexelCoord.x), + __XB_Med3_I32(iSrcSamplePosX01.y, 0, renderSizeLastTexelCoord.x) + ); + + const FfxTexCoordI2 iSrcSamplePosX23Clamped = FfxTexCoordI2( + __XB_Med3_I32(iSrcSamplePosX23.x, 0, renderSizeLastTexelCoord.x), + __XB_Med3_I32(iSrcSamplePosX23.y, 0, renderSizeLastTexelCoord.x) + ); + + const FfxTexCoordI2 iSrcSamplePosY01Clamped = FfxTexCoordI2( + __XB_Med3_I32(iSrcSamplePosY01.x, 0, renderSizeLastTexelCoord.y), + __XB_Med3_I32(iSrcSamplePosY01.y, 0, renderSizeLastTexelCoord.y) + ); + + const FfxTexCoordI2 iSrcSamplePosY23Clamped = FfxTexCoordI2( + __XB_Med3_I32(iSrcSamplePosY23.x, 0, renderSizeLastTexelCoord.y), + __XB_Med3_I32(iSrcSamplePosY23.y, 0, renderSizeLastTexelCoord.y) + ); + + fSamples[ 0] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.x, iSrcSamplePosY01Clamped.x)); + fSamples[ 1] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.y, iSrcSamplePosY01Clamped.x)); + fSamples[ 2] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX23Clamped.x, iSrcSamplePosY01Clamped.x)); + + fSamples[4 + 0] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.x, iSrcSamplePosY01Clamped.y)); + fSamples[4 + 1] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.y, iSrcSamplePosY01Clamped.y)); + fSamples[4 + 2] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX23Clamped.x, iSrcSamplePosY01Clamped.y)); + + fSamples[8 + 0] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.x, iSrcSamplePosY23Clamped.x)); + fSamples[8 + 1] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.y, iSrcSamplePosY23Clamped.x)); + fSamples[8 + 2] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX23Clamped.x, iSrcSamplePosY23Clamped.x)); + + fSamples[12 + 0] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.x, iSrcSamplePosY23Clamped.y)); + fSamples[12 + 1] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.y, iSrcSamplePosY23Clamped.y)); + fSamples[12 + 2] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX23Clamped.x, iSrcSamplePosY23Clamped.y)); + +#else FfxFloat32x2 fOffsetTL = FfxFloat32x2(offsetTL); FFX_UNROLL @@ -130,6 +328,7 @@ FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams p fSamples[iSampleIndex] = LoadPreparedInputColor(FfxInt32x2(sampleCoord)); } } +#endif FfxFloat32x4 fColorAndWeight = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f); @@ -145,6 +344,75 @@ FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams p const FfxFloat32 fRectificationCurveBias = ffxLerp(-2.0f, -3.0f, ffxSaturate(params.fHrVelocity / 50.0f)); +#if FFX_FSR2_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS + // Unroll the loop to load samples on Scarlett to help the shader compiler + const bool coordX0OnScreen = iSrcSamplePosX01.x == iSrcSamplePosX01Clamped.x; + const bool coordX1OnScreen = iSrcSamplePosX01.y == iSrcSamplePosX01Clamped.y; + const bool coordX2OnScreen = iSrcSamplePosX23.x == iSrcSamplePosX23Clamped.x; + + const bool coordY0OnScreen = iSrcSamplePosY01.x == iSrcSamplePosY01Clamped.x; + const bool coordY1OnScreen = iSrcSamplePosY01.y == iSrcSamplePosY01Clamped.y; + const bool coordY2OnScreen = iSrcSamplePosY23.x == iSrcSamplePosY23Clamped.x; + + const FFX_MIN16_F2 fBaseSampleOffsetHalf = FFX_MIN16_F2(fBaseSampleOffset); + + const FFX_MIN16_F2 fSrcSampleOffsetX_02 = fBaseSampleOffsetHalf.xx + fSampleOffsetX02; + const FFX_MIN16_F2 fSrcSampleOffsetY_02 = fBaseSampleOffsetHalf.yy + fSampleOffsetY02; + + const FFX_MIN16_F2 fSrcSampleOffsetXSq_02 = fSrcSampleOffsetX_02 * fSrcSampleOffsetX_02; + const FFX_MIN16_F2 fSrcSampleOffsetYSq_02 = fSrcSampleOffsetY_02 * fSrcSampleOffsetY_02; + const FFX_MIN16_F2 fSrcSampleOffsetXYSq_11 = fBaseSampleOffsetHalf * fBaseSampleOffsetHalf; + + PairedRectificationBoxAndAccumulatedColorAndWeight pairedBox; + pairedBox.setKernelBiasAndRectificationCurveBias(fKernelBias, fRectificationCurveBias); + + // init by o o o + // o x o + // o o o + pairedBox.init( + fSrcSampleOffsetXYSq_11.x + fSrcSampleOffsetXYSq_11.y, + coordX1OnScreen, coordY1OnScreen, + fSamples[5] + ); + + // add remaining two samples from 1st row x o x + // o * o + // o o o + pairedBox.addSample( + fSrcSampleOffsetXSq_02 + fSrcSampleOffsetYSq_02.xx, + coordX0OnScreen, coordX2OnScreen, coordY0OnScreen, + fSamples[0 + 0], fSamples[0 + 2] + ); + + // add two samples from 2nd row * o * + // o * o + // x o x + pairedBox.addSample( + fSrcSampleOffsetXSq_02 + fSrcSampleOffsetYSq_02.yy, + coordX0OnScreen, coordX2OnScreen, coordY2OnScreen, + fSamples[8 + 0], fSamples[8 + 2] + ); + + // add two samples from 3rd row * o * + // x * x + // * o * + pairedBox.addSample( + fSrcSampleOffsetXSq_02 + fSrcSampleOffsetXYSq_11.yy, + coordX0OnScreen, coordX2OnScreen, coordY1OnScreen, + fSamples[4 + 0], fSamples[4 + 2] + ); + + // add remaining samples * x * + // * * * + // * x * + pairedBox.addSample( + fSrcSampleOffsetXYSq_11.xx + fSrcSampleOffsetYSq_02, + coordY0OnScreen, coordY2OnScreen, coordX1OnScreen, + fSamples[0 + 1], fSamples[8 + 1] + ); + + pairedBox.finalize(clippingBox, fColorAndWeight); +#else FFX_UNROLL for (FfxInt32 row = 0; row < 3; row++) { FFX_UNROLL @@ -172,6 +440,7 @@ FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams p } } } +#endif RectificationBoxComputeVarianceBoxData(clippingBox); diff --git a/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h b/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h index 766cba3..084d4d3 100644 --- a/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h +++ b/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h @@ -52,21 +52,20 @@ void RectifyHistory( const FfxFloat32 fReactiveFactor = ffxPow(params.fReactiveMask, 1.0f / 2.0f); const FfxFloat32 fShadingChangeFactor = params.fShadingChange; const FfxFloat32 fBoxScaleT = ffxMax(fVecolityFactor, ffxMax(fDistanceFactor, ffxMax(fAccumulationFactor, ffxMax(fReactiveFactor, fShadingChangeFactor)))); + + const FfxFloat32 fBoxScale = ffxLerp(3.0f, 1.0f, fBoxScaleT); + const FfxFloat32x3 fScaledBoxVec = data.clippingBox.boxVec * FfxFloat32x3(1.7f, 1.0f, 1.0f) * fBoxScale; - const FfxFloat32 fBoxScale = ffxLerp(3.0f, 1.0f, fBoxScaleT); - - const FfxFloat32x3 fScaledBoxVec = data.clippingBox.boxVec * fBoxScale; - const FfxFloat32x3 fBoxMin = data.clippingBox.boxCenter - fScaledBoxVec; - const FfxFloat32x3 fBoxMax = data.clippingBox.boxCenter + fScaledBoxVec; - - if (any(FFX_GREATER_THAN(fBoxMin, data.fHistoryColor)) || any(FFX_GREATER_THAN(data.fHistoryColor, fBoxMax))) { + const FfxFloat32x3 fClampedScaledBoxVec = ffxMax(fScaledBoxVec, FfxFloat32x3(1.193e-7f, 1.193e-7f, 1.193e-7f)); + const FfxFloat32x3 fTransformedHistoryColor = (data.fHistoryColor - data.clippingBox.boxCenter) / fClampedScaledBoxVec; - const FfxFloat32x3 fClampedHistoryColor = clamp(data.fHistoryColor, fBoxMin, fBoxMax); - - const FfxFloat32 fHistoryContribution = ffxMax(params.fLumaInstabilityFactor, data.fLockContributionThisFrame) * params.fAccumulation * (1 - params.fDisocclusion); + if (length(fTransformedHistoryColor)>1.f) { + const FfxFloat32x3 fClampedHistoryColor = normalize(fTransformedHistoryColor); + const FfxFloat32x3 fFinalClampedHistoryColor = (fClampedHistoryColor * fScaledBoxVec) + data.clippingBox.boxCenter; // Scale history color using rectification info, also using accumulation mask to avoid potential invalid color protection - data.fHistoryColor = ffxLerp(fClampedHistoryColor, data.fHistoryColor, ffxSaturate(fHistoryContribution)); + const FfxFloat32 fHistoryContribution = ffxMax(params.fLumaInstabilityFactor, data.fLockContributionThisFrame) * params.fAccumulation * (1 - params.fDisocclusion); + data.fHistoryColor = ffxLerp(fFinalClampedHistoryColor, data.fHistoryColor, ffxSaturate(fHistoryContribution)); } } @@ -97,7 +96,7 @@ void ComputeBaseAccumulationWeight(const AccumulationPassCommonParams params, FF { FfxFloat32 fBaseAccumulation = params.fAccumulation; - fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, 0.15f, ffxSaturate(ffxMax(0.0f, params.f4KVelocity / 0.5f)))); + fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, 0.15f, ffxSaturate(ffxMax(0.0f, (params.f4KVelocity * VelocityFactor()) / 0.5f)))); data.fHistoryWeight = fBaseAccumulation; } @@ -162,6 +161,8 @@ void Accumulate(FfxInt32x2 iPxHrPos) data.fHistoryColor /= Exposure(); + data.fHistoryColor = ffxMax(data.fHistoryColor, FfxFloat32x3(0.0f, 0.0f, 0.0f)); + StoreInternalColorAndWeight(iPxHrPos, FfxFloat32x4(data.fHistoryColor, data.fLock)); // Output final color when RCAS is disabled diff --git a/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h b/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h index 1c3fc99..2b2811e 100644 --- a/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h +++ b/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h @@ -75,6 +75,8 @@ cbuffer cbFSR3Upscaler : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_FSR3UP FfxFloat32 fDeltaPreExposure; FfxFloat32 fViewSpaceToMetersFactor; FfxFloat32 fFrameIndex; + + FfxFloat32 fVelocityFactor; }; #define FFX_FSR3UPSCALER_CONSTANT_BUFFER_1_SIZE (sizeof(cbFSR3Upscaler) / 4) // Number of 32-bit values. This must be kept in sync with the cbFSR3Upscaler size. @@ -170,6 +172,11 @@ FfxFloat32 FrameIndex() return fFrameIndex; } +FfxFloat32 VelocityFactor() +{ + return fVelocityFactor; +} + #endif // #if defined(FSR3UPSCALER_BIND_CB_FSR3UPSCALER) #define FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(p) FFX_FSR3UPSCALER_ROOTSIG_STR(p) @@ -788,9 +795,15 @@ FfxFloat32 Exposure() { FfxFloat32 exposure = r_input_exposure[FfxUInt32x2(0, 0)].x; +#if defined(__XBOX_SCARLETT) + if (exposure < 0.000030517578/** 2^-15 */) { + exposure = 1.0f; + } +#else if (exposure == 0.0f) { exposure = 1.0f; } +#endif // #if defined(__XBOX_SCARLETT) return exposure; } @@ -916,14 +929,18 @@ FfxFloat32x4 FrameInfo() defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_2) && \ defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_3) && \ defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4) && \ - defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5) + (defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5) || defined(SHADER_API_GLCORE)) RWTexture2D rw_spd_mip0 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_0); RWTexture2D rw_spd_mip1 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_1); RWTexture2D rw_spd_mip2 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_2); RWTexture2D rw_spd_mip3 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_3); +#ifdef SHADER_API_GLCORE +globallycoherent RWTexture2D rw_spd_mip4 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4); +#else RWTexture2D rw_spd_mip4 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4); globallycoherent RWTexture2D rw_spd_mip5 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5); +#endif FfxFloat32x2 RWLoadPyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 index) { @@ -937,7 +954,9 @@ FfxFloat32x2 RWLoadPyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN LOAD(2); LOAD(3); LOAD(4); +#ifndef SHADER_API_GLCORE LOAD(5); +#endif return 0; @@ -957,7 +976,9 @@ void StorePyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat3 STORE(2); STORE(3); STORE(4); +#ifndef SHADER_API_GLCORE STORE(5); +#endif #undef STORE } diff --git a/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h b/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h index dd479b1..87aa596 100644 --- a/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h +++ b/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h @@ -99,7 +99,7 @@ FfxFloat32 SceneAverageLuma() #endif // Auto exposure -FFX_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e8f; +FFX_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e4f; struct AccumulationPassCommonParams { diff --git a/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid.h b/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid.h index e8a8c49..c545579 100644 --- a/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid.h +++ b/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid.h @@ -47,6 +47,12 @@ FFX_STATIC const FfxInt32 LOG_LUMA = 0; FFX_STATIC const FfxInt32 LUMA = 1; FFX_STATIC const FfxInt32 DEPTH_IN_METERS = 2; +#ifdef SHADER_API_GLCORE +FFX_STATIC const FfxInt32 MAX_MIP = 4; +#else +FFX_STATIC const FfxInt32 MAX_MIP = 5; +#endif + FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 iPxPos, FfxUInt32 slice) { //We assume linear data. if non-linear input (sRGB, ...), @@ -67,7 +73,7 @@ FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 iPxPos, FfxUInt32 slice) FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice) { - return FfxFloat32x4(RWLoadPyramid(tex, 5), 0, 0); + return FfxFloat32x4(RWLoadPyramid(tex, MAX_MIP), 0, 0); } FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) @@ -77,7 +83,7 @@ FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFl void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice) { - if (index == 5) + if (index == MAX_MIP) { StorePyramid(pix, outValue.xy, index); } diff --git a/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h b/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h index 90a85b3..81f8ed2 100644 --- a/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h +++ b/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h @@ -25,38 +25,82 @@ #include "../ffx_core.h" +#if FFX_HALF && defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) + #define FSR_RCAS_PREFER_PAIRED_VERSION 1 +#else + #define FSR_RCAS_PREFER_PAIRED_VERSION 0 +#endif + void WriteUpscaledOutput(FFX_MIN16_U2 iPxHrPos, FfxFloat32x3 fUpscaledColor) { StoreUpscaledOutput(FFX_MIN16_I2(iPxHrPos), fUpscaledColor); } -#define FSR_RCAS_F 1 -FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p) -{ - FfxFloat32x4 fColor = LoadRCAS_Input(p); +#if FSR_RCAS_PREFER_PAIRED_VERSION + #define FSR_RCAS_HX2 1 + FfxFloat16x4 FsrRcasLoadHx2(FfxInt16x2 p) + { + return FfxFloat16x4(LoadRCAS_Input(p)); + } + void FsrRcasInputHx2(inout FfxFloat16x2 r, inout FfxFloat16x2 g, inout FfxFloat16x2 b) + { + FfxFloat32 e = Exposure(); + r = FfxFloat16x2(r * e); + g = FfxFloat16x2(g * e); + b = FfxFloat16x2(b * e); + } - fColor.rgb *= Exposure(); + #include "../fsr1/ffx_fsr1.h" + + void CurrFilterPaired(FFX_MIN16_U2 pos) + { + FfxFloat16x2 cr; + FfxFloat16x2 cg; + FfxFloat16x2 cb; + FsrRcasHx2(cr, cg, cb, pos, RCASConfig()); + FfxFloat32 InvExposure = 1.0f / Exposure(); + cr = FfxFloat16x2(cr * InvExposure); + cg = FfxFloat16x2(cg * InvExposure); + cb = FfxFloat16x2(cb * InvExposure); + WriteUpscaledOutput(pos, FfxFloat16x3(cr.x, cg.x, cb.x)); //TODO: fix type + pos.x += 8; + WriteUpscaledOutput(pos, FfxFloat16x3(cr.y, cg.y, cb.y)); //TODO: fix type + } +#else + #define FSR_RCAS_F 1 + FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p) + { + FfxFloat32x4 fColor = LoadRCAS_Input(p); - return fColor; -} -void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {} + fColor.rgb *= Exposure(); -#include "../fsr1/ffx_fsr1.h" + return fColor; + } + void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {} -void CurrFilter(FFX_MIN16_U2 pos) -{ - FfxFloat32x3 c; - FsrRcasF(c.r, c.g, c.b, pos, RCASConfig()); + #include "../fsr1/ffx_fsr1.h" - c /= Exposure(); + void CurrFilter(FFX_MIN16_U2 pos) + { + FfxFloat32x3 c; + FsrRcasF(c.r, c.g, c.b, pos, RCASConfig()); - WriteUpscaledOutput(pos, c); -} + c /= Exposure(); + + WriteUpscaledOutput(pos, c); + } + +#endif // #if FSR_RCAS_PREFER_PAIRED_VERSION void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid) { // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u); +#if FSR_RCAS_PREFER_PAIRED_VERSION + CurrFilterPaired(FFX_MIN16_U2(gxy)); + gxy.y += 8u; + CurrFilterPaired(FFX_MIN16_U2(gxy)); +#else CurrFilter(FFX_MIN16_U2(gxy)); gxy.x += 8u; CurrFilter(FFX_MIN16_U2(gxy)); @@ -64,4 +108,5 @@ void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid) CurrFilter(FFX_MIN16_U2(gxy)); gxy.x -= 8u; CurrFilter(FFX_MIN16_U2(gxy)); +#endif } diff --git a/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h b/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h index 153a9b7..bc0e9d4 100644 --- a/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h +++ b/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h @@ -32,6 +32,16 @@ FfxFloat32x4 WrapHistory(FfxInt32x2 iPxSample) DeclareCustomFetchBicubicSamples(FetchHistorySamples, WrapHistory) DeclareCustomTextureSample(HistorySample, FFX_FSR3UPSCALER_GET_LANCZOS_SAMPLER1D(FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples) +#if FFX_HALF +FFX_MIN16_F4 WrapHistory16(FfxInt32x2 iPxSample) +{ + return FFX_MIN16_F4(LoadHistory(iPxSample)); +} + +DeclareCustomFetchBicubicSamplesMin16(FetchHistorySamples16, WrapHistory16) +DeclareCustomTextureSampleMin16(HistorySample16, FFX_FSR3UPSCALER_GET_LANCZOS_SAMPLER1D(FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples16) +#endif + FfxFloat32x2 GetMotionVector(FfxInt32x2 iPxHrPos, FfxFloat32x2 fHrUv) { #if FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS @@ -51,8 +61,13 @@ void ComputeReprojectedUVs(const AccumulationPassCommonParams params, FFX_PARAME } void ReprojectHistoryColor(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) + { +#if FFX_HALF && FFX_FSR3UPSCALER_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF + const FfxFloat32x4 fReprojectedHistory = FfxFloat32x4(HistorySample16(params.fReprojectedHrUv, UpscaleSize())); +#else const FfxFloat32x4 fReprojectedHistory = HistorySample(params.fReprojectedHrUv, PreviousFrameUpscaleSize()); +#endif data.fHistoryColor = fReprojectedHistory.rgb; data.fHistoryColor *= DeltaPreExposure(); diff --git a/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h b/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h index 5f727b1..7a723d5 100644 --- a/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h +++ b/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h @@ -169,6 +169,24 @@ FFX_MIN16_F Lanczos2ApproxSqNoClamp(FFX_MIN16_F x2) FFX_MIN16_F b = FFX_MIN16_F(1.0f / 4.0f) * x2 - FFX_MIN16_F(1); return (FFX_MIN16_F(25.0f / 16.0f) * a * a - FFX_MIN16_F(25.0f / 16.0f - 1)) * (b * b); } + +#if defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) +FFX_MIN16_F2 PairedLanczos2ApproxSqNoClamp(FFX_MIN16_F2 x2) +{ + // Xbox ATG (Pavel): + // + // 2.0 * x2 - 5.0 25.0 25.0 - 16.0 (2.0 * x2 - 5.0)^2 - (3.0)^2 (2.0 * x2 - 8.0) * (2.0 * x2 - 2.0) (x2 - 4.0) * (x2 - 1.0) + // a = -------------- ==> ---- * a^2 - -------------- = ----------------------------- = ---------------------------------- = ----------------------- = b * (x2 - 1.0) + // 5.0 16.0 16.0 16.0 16.0 4.0 + // + // so we need to compute just (b * b) * (b * x2 - b), so we should get four packed instructions: 2 fma + 2 mul + // + + FFX_MIN16_F2 b = (0.25 * x2 - 1.0); + return (b * b) * (b * x2 - b); +} +#endif + #endif //FFX_HALF FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2) @@ -183,6 +201,14 @@ FFX_MIN16_F Lanczos2ApproxSq(FFX_MIN16_F x2) x2 = ffxMin(x2, FFX_MIN16_F(4.0f)); return Lanczos2ApproxSqNoClamp(x2); } + +#if defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) +FFX_MIN16_F2 PairedLanczos2ApproxSq(FFX_MIN16_F2 x2) +{ + x2 = ffxMin(x2, FFX_MIN16_F2(4.0, 4.0)); + return PairedLanczos2ApproxSqNoClamp(x2); +} +#endif #endif //FFX_HALF FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x) diff --git a/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid.h b/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid.h index 651c5b3..f09e991 100644 --- a/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid.h +++ b/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid.h @@ -47,6 +47,12 @@ FFX_STATIC const FfxInt32 DIFFERENCE = 0; FFX_STATIC const FfxInt32 SIGN_SUM = 1; FFX_STATIC const FfxInt32 MIP0_INDICATOR = 2; +#ifdef SHADER_API_GLCORE +FFX_STATIC const FfxInt32 MAX_MIP = 4; +#else +FFX_STATIC const FfxInt32 MAX_MIP = 5; +#endif + FfxFloat32x2 Sort2(FfxFloat32x2 v) { return FfxFloat32x2(ffxMin(v.x, v.y), ffxMax(v.x, v.y)); @@ -200,7 +206,7 @@ FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 iPxPos, FfxUInt32 slice) FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice) { - return FfxFloat32x4(RWLoadPyramid(tex, 5), 0, 0); + return FfxFloat32x4(RWLoadPyramid(tex, MAX_MIP), 0, 0); } FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) diff --git a/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h b/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h index 2d587f0..801a0a9 100644 --- a/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h +++ b/Packages/fidelityfx.fsr/Shaders/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h @@ -44,6 +44,26 @@ FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32 fK return fSampleWeight; } +#if FFX_HALF +FFX_MIN16_F GetUpsampleLanczosWeight(FFX_MIN16_F2 fSrcSampleOffset, FFX_MIN16_F fKernelWeight) +{ + FFX_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx; +#if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE + FFX_MIN16_F fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased)); +#elif FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT + FFX_MIN16_F fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased)); +#elif FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE + FFX_MIN16_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); + + // To Test: Save reciproqual sqrt compute + // FfxFloat32 fSampleWeight = Lanczos2Sq_UseLUT(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); +#else +#error "Invalid Lanczos type" +#endif + return fSampleWeight; +} +#endif + FfxFloat32 ComputeMaxKernelWeight(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) { const FfxFloat32 fKernelSizeBias = 1.0f + (1.0f / FfxFloat32x2(DownscaleFactor()) - 1.0f).x; @@ -59,6 +79,225 @@ FfxFloat32x3 LoadPreparedColor(FfxInt32x2 iSamplePos) return fPreparedYCoCg; } +#if FFX_HALF && (FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2) && defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) +#define FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS 1 +#else +#define FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS 0 +#endif + +#if FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS + +void LoadPreparedColorPairedRgb(FFX_PARAMETER_OUT FFX_MIN16_F2 r, + FFX_PARAMETER_OUT FFX_MIN16_F2 g, + FFX_PARAMETER_OUT FFX_MIN16_F2 b, + FfxInt32x2 iSamplePos0, + FfxInt32x2 iSamplePos1) +{ + const FFX_MIN16_F3 sample0 = FFX_MIN16_F3(LoadInputColor(iSamplePos0)); + const FFX_MIN16_F3 sample1 = FFX_MIN16_F3(LoadInputColor(iSamplePos1)); + + r = ffxMax(FFX_MIN16_F2(0, 0), FFX_MIN16_F2(sample0.r, sample1.r)); + g = ffxMax(FFX_MIN16_F2(0, 0), FFX_MIN16_F2(sample0.g, sample1.g)); + b = ffxMax(FFX_MIN16_F2(0, 0), FFX_MIN16_F2(sample0.b, sample1.b)); + + r = FFX_MIN16_F2(r * Exposure()); + g = FFX_MIN16_F2(g * Exposure()); + b = FFX_MIN16_F2(b * Exposure()); +} + +void TonemapPaired(FFX_PARAMETER_INOUT FFX_MIN16_F2 r, FFX_PARAMETER_INOUT FFX_MIN16_F2 g, FFX_PARAMETER_INOUT FFX_MIN16_F2 b) +{ + FFX_MIN16_F2 denomF16 = ffxMax(ffxMax(ffxMax(0.0, r), g), b) + FFX_MIN16_F2(1.0, 1.0); + + // NOTE: expect 2 x v_cvt_f32_f16 + FfxFloat32x2 denomF32 = FfxFloat32x2(denomF16); + // NOTE: expect 2 x v_rcp_f32 + FfxFloat32x2 normF32 = FfxFloat32x2(1.0, 1.0) / denomF32; + // NOTE: expect 2 x v_cvt_f16_f32 + FFX_MIN16_F2 normF16 = FFX_MIN16_F2(normF32); + + r *= normF16; + g *= normF16; + b *= normF16; +} + +void RGBToYCoCgPaired(FFX_PARAMETER_INOUT FFX_MIN16_F2 r, FFX_PARAMETER_INOUT FFX_MIN16_F2 g, FFX_PARAMETER_INOUT FFX_MIN16_F2 b) +{ + /** + * NOTE: given the following conversion + * + * fYCoCg = FfxFloat32x3( + * 0.25f * fRgb.r + 0.5f * fRgb.g + 0.25f * fRgb.b, + * 0.5f * fRgb.r - 0.5f * fRgb.b, + * -0.25f * fRgb.r + 0.5f * fRgb.g - 0.25f * fRgb.b); + * + * it's possible to notice that we can compute: + * RplusBdiv4 = 0.25 * (R + B) + * + * so everything else is computed in 3 instructions + * Y = G * 0.5 + RplusBdiv4 + * Co = 2 * RplusBdiv4 - G + * Cg = G * 0.5 - RplusBdiv4 + */ + + // NOTE: expect v_pk_add_f32 + v_pk_mul_f32 + FFX_MIN16_F2 RplusBdiv4 = (r + b) * 0.25; + FFX_MIN16_F2 G = g; + FFX_MIN16_F2 B = b; + + // NOTE: expect 3x v_pk_fma_f32 + r = G * 0.5 + RplusBdiv4; + g = RplusBdiv4 * 2.0 - B; + b = G * 0.5 - RplusBdiv4; +} + +FFX_MIN16_F2 Compute3x3SamplesMinMaxPaired(FFX_PARAMETER_IN FFX_MIN16_F2 sampleCenter, + FFX_PARAMETER_IN FFX_MIN16_F2 sample0, + FFX_PARAMETER_IN FFX_MIN16_F2 sample1, + FFX_PARAMETER_IN FFX_MIN16_F2 sample2, + FFX_PARAMETER_IN FFX_MIN16_F2 sample3) +{ + FFX_MIN16_F2 twoMinValues = ffxMin(ffxMin(sample0, sample1), ffxMin(sample2, sample3)); + FFX_MIN16_F2 twoMaxValues = ffxMax(ffxMax(sample0, sample1), ffxMax(sample2, sample3)); + + return FFX_MIN16_F2( + ffxMin3Half(twoMinValues.x, twoMinValues.y, sampleCenter.x), + ffxMax3Half(twoMaxValues.x, twoMaxValues.y, sampleCenter.x) + ); +} + + +FFX_MIN16_F2 Bool2ToFloat16x2(bool x, bool y) +{ + uint lo = x ? 0x00003c00 : 0x00000000; + uint hi = y ? 0x3c000000 : 0x00000000; + return FFX_MIN16_F2(__XB_AsHalf(lo).x, __XB_AsHalf(hi).y); +} + +struct PairedRectificationBoxAndAccumulatedColorAndWeight +{ + FFX_MIN16_F2 boxCenterR; + FFX_MIN16_F2 boxCenterG; + FFX_MIN16_F2 boxCenterB; + + FFX_MIN16_F2 boxVecR; + FFX_MIN16_F2 boxVecG; + FFX_MIN16_F2 boxVecB; + + FFX_MIN16_F2 fBoxCenterWeight; + + FFX_MIN16_F2 fColorR; + FFX_MIN16_F2 fColorG; + FFX_MIN16_F2 fColorB; + FFX_MIN16_F2 fWeight; + + FFX_MIN16_F fKernelBiasSq; + FFX_MIN16_F fRectificationCurveBias; + + void setKernelBiasAndRectificationCurveBias(FfxFloat32 kernelBias, FfxFloat32 rectificationCurveBias) + { + fKernelBiasSq = FFX_MIN16_F(kernelBias * kernelBias); + fRectificationCurveBias = FFX_MIN16_F(rectificationCurveBias); + } + + void initUpscaledColor(FFX_MIN16_F fSrcSampleOffsetSq, FFX_MIN16_F fOnScreenWeight, FFX_MIN16_F2 sampleR, FFX_MIN16_F2 sampleG, FFX_MIN16_F2 sampleB) + { + #if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE + const FFX_MIN16_F2 LanczosUpsampleWeight = FFX_MIN16_F2( + PairedLanczos2ApproxSq(fSrcSampleOffsetSq * fKernelBiasSq).x, + 0.0 + ); + #else + #error "Only LANCZOS_TYPE_APPROXIMATE is supported in paired version so far" + #endif + const FFX_MIN16_F2 fSampleWeight = fOnScreenWeight * LanczosUpsampleWeight; + + fColorR = sampleR * fSampleWeight; + fColorG = sampleG * fSampleWeight; + fColorB = sampleB * fSampleWeight; + fWeight = fSampleWeight; + } + + void initBox(FFX_MIN16_F fSrcSampleOffsetSq, FFX_MIN16_F fOnScreenWeight, FFX_MIN16_F2 sampleR, FFX_MIN16_F2 sampleG, FFX_MIN16_F2 sampleB) + { + const FFX_MIN16_F2 fBoxSampleWeight = FFX_MIN16_F2( + exp(fRectificationCurveBias * fSrcSampleOffsetSq) * fOnScreenWeight, + 0.0 + ); + + FFX_MIN16_F2 weightedSampleR = sampleR * fBoxSampleWeight; + FFX_MIN16_F2 weightedSampleG = sampleG * fBoxSampleWeight; + FFX_MIN16_F2 weightedSampleB = sampleB * fBoxSampleWeight; + + boxCenterR = weightedSampleR; + boxCenterG = weightedSampleG; + boxCenterB = weightedSampleB; + + boxVecR = sampleR * weightedSampleR; + boxVecG = sampleG * weightedSampleG; + boxVecB = sampleB * weightedSampleB; + + fBoxCenterWeight = fBoxSampleWeight; + } + + void addUpscaledColorSample(FFX_MIN16_F2 fSrcSampleOffsetSq, FFX_MIN16_F2 fOnScreenWeight, FFX_MIN16_F2 sampleR, FFX_MIN16_F2 sampleG, FFX_MIN16_F2 sampleB) + { + #if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE + const FFX_MIN16_F2 LanczosUpsampleWeight = PairedLanczos2ApproxSq(fSrcSampleOffsetSq * fKernelBiasSq); + #else + #error "Only LANCZOS_TYPE_APPROXIMATE is supported in paired version so far" + #endif + const FFX_MIN16_F2 fSampleWeight = fOnScreenWeight * LanczosUpsampleWeight; + + fColorR += sampleR * fSampleWeight; + fColorG += sampleG * fSampleWeight; + fColorB += sampleB * fSampleWeight; + fWeight += fSampleWeight; + } + + void addBoxSample(FFX_MIN16_F2 fSrcSampleOffsetSq, FFX_MIN16_F2 fOnScreenWeight, FFX_MIN16_F2 sampleR, FFX_MIN16_F2 sampleG, FFX_MIN16_F2 sampleB) + { + // NOTE: ideally expect here 2x v_fma_mix + 2x v_exp_f32 + 2x v_fma_mix + const FFX_MIN16_F2 fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq) * fOnScreenWeight; + + FFX_MIN16_F2 weightedSampleR = sampleR * fBoxSampleWeight; + FFX_MIN16_F2 weightedSampleG = sampleG * fBoxSampleWeight; + FFX_MIN16_F2 weightedSampleB = sampleB * fBoxSampleWeight; + + boxCenterR += weightedSampleR; + boxCenterG += weightedSampleG; + boxCenterB += weightedSampleB; + + boxVecR += sampleR * weightedSampleR; + boxVecG += sampleG * weightedSampleG; + boxVecB += sampleB * weightedSampleB; + + fBoxCenterWeight += fBoxSampleWeight; + } + + void finalizeUpscaledColor(FFX_PARAMETER_OUT FfxFloat32x4 upscaledColorAndWeight) + { + upscaledColorAndWeight.r = fColorR.x + fColorR.y; + upscaledColorAndWeight.g = fColorG.x + fColorG.y; + upscaledColorAndWeight.b = fColorB.x + fColorB.y; + + upscaledColorAndWeight.a = fWeight.x + fWeight.y; + } + + void finalizeBox(FFX_PARAMETER_OUT FfxFloat32x2 boxCenterAndVecR, + FFX_PARAMETER_OUT FfxFloat32x2 boxCenterAndVecG, + FFX_PARAMETER_OUT FfxFloat32x2 boxCenterAndVecB, + FFX_PARAMETER_OUT FfxFloat32 boxCenterWeight) + { + boxCenterAndVecR = FfxFloat32x2(boxCenterR.x + boxCenterR.y, boxVecR.x + boxVecR.y); + boxCenterAndVecG = FfxFloat32x2(boxCenterG.x + boxCenterG.y, boxVecG.x + boxVecG.y); + boxCenterAndVecB = FfxFloat32x2(boxCenterB.x + boxCenterB.y, boxVecB.x + boxVecB.y); + + boxCenterWeight = fBoxCenterWeight.x + fBoxCenterWeight.y; + } +}; +#endif // #if FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS + void ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) { // We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly) @@ -82,6 +321,90 @@ void ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params, F const FfxBoolean bIsInitialSample = (params.fAccumulation == 0.0f); +#if FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS + // Unroll the loop to load samples on Scarlett to help the shader compiler + const FFX_MIN16_F2 fSampleOffsetX02 = __XB_AsHalf(bFlipCol ? __XB_AsUInt(FFX_MIN16_F2( 1, -1)) : __XB_AsUInt(FFX_MIN16_F2(-1, 1))); + const FFX_MIN16_F2 fSampleOffsetY02 = __XB_AsHalf(bFlipRow ? __XB_AsUInt(FFX_MIN16_F2( 1, -1)) : __XB_AsUInt(FFX_MIN16_F2(-1, 1))); + + typedef FfxInt32 FfxTexCoordI; + typedef FfxInt32x2 FfxTexCoordI2; + + const FfxTexCoordI2 iSrcSamplePosX01 = FfxTexCoordI2(iSrcInputPos.xx) + (bFlipCol ? FfxTexCoordI2( 1, 0) : FfxTexCoordI2(-1, 0)); + const FfxTexCoordI2 iSrcSamplePosX23 = FfxTexCoordI2(iSrcInputPos.xx) + (bFlipCol ? FfxTexCoordI2(-1, -2) : FfxTexCoordI2( 1, 2)); + + const FfxTexCoordI2 iSrcSamplePosY01 = FfxTexCoordI2(iSrcInputPos.yy) + (bFlipRow ? FfxTexCoordI2( 1, 0) : FfxTexCoordI2(-1, 0)); + const FfxTexCoordI2 iSrcSamplePosY23 = FfxTexCoordI2(iSrcInputPos.yy) + (bFlipRow ? FfxTexCoordI2(-1, -2) : FfxTexCoordI2( 1, 2)); + + const FfxTexCoordI2 renderSizeLastTexelCoord = FfxTexCoordI2(RenderSize()) - FfxTexCoordI2(1, 1); + + const FfxTexCoordI2 iSrcSamplePosX01Clamped = FfxTexCoordI2( + __XB_Med3_I32(iSrcSamplePosX01.x, 0, renderSizeLastTexelCoord.x), + __XB_Med3_I32(iSrcSamplePosX01.y, 0, renderSizeLastTexelCoord.x) + ); + + const FfxTexCoordI2 iSrcSamplePosX23Clamped = FfxTexCoordI2( + __XB_Med3_I32(iSrcSamplePosX23.x, 0, renderSizeLastTexelCoord.x), + __XB_Med3_I32(iSrcSamplePosX23.y, 0, renderSizeLastTexelCoord.x) + ); + + const FfxTexCoordI2 iSrcSamplePosY01Clamped = FfxTexCoordI2( + __XB_Med3_I32(iSrcSamplePosY01.x, 0, renderSizeLastTexelCoord.y), + __XB_Med3_I32(iSrcSamplePosY01.y, 0, renderSizeLastTexelCoord.y) + ); + + const FfxTexCoordI2 iSrcSamplePosY23Clamped = FfxTexCoordI2( + __XB_Med3_I32(iSrcSamplePosY23.x, 0, renderSizeLastTexelCoord.y), + __XB_Med3_I32(iSrcSamplePosY23.y, 0, renderSizeLastTexelCoord.y) + ); + + FFX_MIN16_F2 TopCornerR, BotCornerR, HorzR, VertR, CenterR; + FFX_MIN16_F2 TopCornerG, BotCornerG, HorzG, VertG, CenterG; + FFX_MIN16_F2 TopCornerB, BotCornerB, HorzB, VertB, CenterB; + + LoadPreparedColorPairedRgb(TopCornerR, TopCornerG, TopCornerB, + FfxTexCoordI2(iSrcSamplePosX01Clamped.x, iSrcSamplePosY01Clamped.x), + FfxTexCoordI2(iSrcSamplePosX23Clamped.x, iSrcSamplePosY01Clamped.x) + ); + + LoadPreparedColorPairedRgb(BotCornerR, BotCornerG, BotCornerB, + FfxTexCoordI2(iSrcSamplePosX01Clamped.x, iSrcSamplePosY23Clamped.x), + FfxTexCoordI2(iSrcSamplePosX23Clamped.x, iSrcSamplePosY23Clamped.x) + ); + + LoadPreparedColorPairedRgb(HorzR, HorzG, HorzB, + FfxTexCoordI2(iSrcSamplePosX01Clamped.x, iSrcSamplePosY01Clamped.y), + FfxTexCoordI2(iSrcSamplePosX23Clamped.x, iSrcSamplePosY01Clamped.y) + ); + + LoadPreparedColorPairedRgb(VertR, VertG, VertB, + FfxTexCoordI2(iSrcSamplePosX01Clamped.y, iSrcSamplePosY01Clamped.x), + FfxTexCoordI2(iSrcSamplePosX01Clamped.y, iSrcSamplePosY23Clamped.x) + ); + + // NOTE: duplicated data + LoadPreparedColorPairedRgb(CenterR, CenterG, CenterB, + FfxTexCoordI2(iSrcSamplePosX01Clamped.y, iSrcSamplePosY01Clamped.y), + FfxTexCoordI2(iSrcSamplePosX01Clamped.y, iSrcSamplePosY01Clamped.y) + ); + + #if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT + if (bIsInitialSample) + { + TonemapPaired(TopCornerR, TopCornerG, TopCornerB); + TonemapPaired(BotCornerR, BotCornerG, BotCornerB); + TonemapPaired(HorzR, HorzG, HorzB); + TonemapPaired(VertR, VertG, VertB); + TonemapPaired(CenterR, CenterG, CenterB); + } + #endif + + RGBToYCoCgPaired(TopCornerR, TopCornerG, TopCornerB); + RGBToYCoCgPaired(BotCornerR, BotCornerG, BotCornerB); + RGBToYCoCgPaired(HorzR, HorzG, HorzB); + RGBToYCoCgPaired(VertR, VertG, VertB); + RGBToYCoCgPaired(CenterR, CenterG, CenterB); + +#else FfxFloat32x3 fSamples[9]; FfxInt32 iSampleIndex = 0; @@ -110,6 +433,8 @@ void ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params, F } #endif +#endif // #if FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS + // Identify how much of each upsampled color to be used for this frame const FfxFloat32 fKernelBiasMax = ComputeMaxKernelWeight(params, data); const FfxFloat32 fKernelBiasMin = ffxMax(1.0f, ((1.0f + fKernelBiasMax) * 0.3f)); @@ -122,6 +447,139 @@ void ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params, F const FfxFloat32 fKernelBias = ffxLerp(fKernelBiasMin, fKernelBiasMax, fKernelBiasWeight); +#if FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS + // Unroll the loop to load samples on Scarlett to help the shader compiler + const bool coordX0OnScreen = iSrcSamplePosX01.x == iSrcSamplePosX01Clamped.x; + const bool coordX1OnScreen = iSrcSamplePosX01.y == iSrcSamplePosX01Clamped.y; + const bool coordX2OnScreen = iSrcSamplePosX23.x == iSrcSamplePosX23Clamped.x; + + const bool coordY0OnScreen = iSrcSamplePosY01.x == iSrcSamplePosY01Clamped.x; + const bool coordY1OnScreen = iSrcSamplePosY01.y == iSrcSamplePosY01Clamped.y; + const bool coordY2OnScreen = iSrcSamplePosY23.x == iSrcSamplePosY23Clamped.x; + + const FFX_MIN16_F2 fBaseSampleOffsetHalf = FFX_MIN16_F2(fBaseSampleOffset); + + const FFX_MIN16_F2 fSrcSampleOffsetX_02 = fBaseSampleOffsetHalf.xx + fSampleOffsetX02; + const FFX_MIN16_F2 fSrcSampleOffsetY_02 = fBaseSampleOffsetHalf.yy + fSampleOffsetY02; + + const FFX_MIN16_F2 fSrcSampleOffsetXSq_02 = fSrcSampleOffsetX_02 * fSrcSampleOffsetX_02; + const FFX_MIN16_F2 fSrcSampleOffsetYSq_02 = fSrcSampleOffsetY_02 * fSrcSampleOffsetY_02; + const FFX_MIN16_F2 fSrcSampleOffsetXYSq_11 = fBaseSampleOffsetHalf * fBaseSampleOffsetHalf; + + const FfxFloat32 fRectificationCurveBias = -2.3f; + PairedRectificationBoxAndAccumulatedColorAndWeight pairedBox; + pairedBox.setKernelBiasAndRectificationCurveBias(fKernelBias, fRectificationCurveBias); + + // init by o o o + // o x o + // o o o + pairedBox.initBox( + fSrcSampleOffsetXYSq_11.x + fSrcSampleOffsetXYSq_11.y, + Bool2ToFloat16x2(coordX1OnScreen && coordY1OnScreen, false).x, + CenterR, CenterG, CenterB + ); + + // add remaining two samples from 1st row x o x + // o * o + // o o o + pairedBox.addBoxSample( + fSrcSampleOffsetXSq_02 + fSrcSampleOffsetYSq_02.xx, + Bool2ToFloat16x2(coordX0OnScreen && coordY0OnScreen, coordX2OnScreen && coordY0OnScreen), + TopCornerR, TopCornerG, TopCornerB + ); + + // add two samples from 2nd row * o * + // o * o + // x o x + pairedBox.addBoxSample( + fSrcSampleOffsetXSq_02 + fSrcSampleOffsetYSq_02.yy, + Bool2ToFloat16x2(coordX0OnScreen && coordY2OnScreen, coordX2OnScreen && coordY2OnScreen), + BotCornerR, BotCornerG, BotCornerB + ); + + // add two samples from 3rd row * o * + // x * x + // * o * + pairedBox.addBoxSample( + fSrcSampleOffsetXSq_02 + fSrcSampleOffsetXYSq_11.yy, + Bool2ToFloat16x2(coordX0OnScreen && coordY1OnScreen, coordX2OnScreen && coordY1OnScreen), + HorzR, HorzG, HorzB + ); + + // add remaining samples * x * + // * * * + // * x * + pairedBox.addBoxSample( + fSrcSampleOffsetXYSq_11.xx + fSrcSampleOffsetYSq_02, + Bool2ToFloat16x2(coordX1OnScreen && coordY0OnScreen, coordX1OnScreen && coordY2OnScreen), + VertR, VertG, VertB + ); + + FfxFloat32x2 boxCenterAndVecR, boxCenterAndVecG, boxCenterAndVecB; + FfxFloat32 boxCenterWeight; + pairedBox.finalizeBox(boxCenterAndVecR, boxCenterAndVecG, boxCenterAndVecB, boxCenterWeight); + + if (!bIsInitialSample) + { + pairedBox.initUpscaledColor( + fSrcSampleOffsetXYSq_11.x + fSrcSampleOffsetXYSq_11.y, + Bool2ToFloat16x2(coordX1OnScreen && coordY1OnScreen, false).x, + CenterR, CenterG, CenterB + ); + + // add remaining two samples from 1st row x o x + // o * o + // o o o + pairedBox.addUpscaledColorSample( + fSrcSampleOffsetXSq_02 + fSrcSampleOffsetYSq_02.xx, + Bool2ToFloat16x2(coordX0OnScreen && coordY0OnScreen, coordX2OnScreen && coordY0OnScreen), + TopCornerR, TopCornerG, TopCornerB + ); + + // add two samples from 2nd row * o * + // o * o + // x o x + pairedBox.addUpscaledColorSample( + fSrcSampleOffsetXSq_02 + fSrcSampleOffsetYSq_02.yy, + Bool2ToFloat16x2(coordX0OnScreen && coordY2OnScreen, coordX2OnScreen && coordY2OnScreen), + BotCornerR, BotCornerG, BotCornerB + ); + + // add two samples from 3rd row * o * + // x * x + // * o * + pairedBox.addUpscaledColorSample( + fSrcSampleOffsetXSq_02 + fSrcSampleOffsetXYSq_11.yy, + Bool2ToFloat16x2(coordX0OnScreen && coordY1OnScreen, coordX2OnScreen && coordY1OnScreen), + HorzR, HorzG, HorzB + ); + + // add remaining samples * x * + // * * * + // * x * + pairedBox.addUpscaledColorSample( + fSrcSampleOffsetXYSq_11.xx + fSrcSampleOffsetYSq_02, + Bool2ToFloat16x2(coordX1OnScreen && coordY0OnScreen, coordX1OnScreen && coordY2OnScreen), + VertR, VertG, VertB + ); + + FfxFloat32x4 upscaledColorAndWeight = 0.0; + pairedBox.finalizeUpscaledColor(upscaledColorAndWeight); + + data.fUpsampledColor = FfxFloat32x3(upscaledColorAndWeight.rgb); + data.fUpsampledWeight = FfxFloat32(upscaledColorAndWeight.w); + } + + FFX_MIN16_F2 aabbMinMaxR = Compute3x3SamplesMinMaxPaired(CenterR, TopCornerR, BotCornerR, HorzR, VertR); + FFX_MIN16_F2 aabbMinMaxG = Compute3x3SamplesMinMaxPaired(CenterG, TopCornerG, BotCornerG, HorzG, VertG); + FFX_MIN16_F2 aabbMinMaxB = Compute3x3SamplesMinMaxPaired(CenterB, TopCornerB, BotCornerB, HorzB, VertB); + + data.clippingBox.boxCenter = FfxFloat32x3(boxCenterAndVecR.x, boxCenterAndVecG.x, boxCenterAndVecB.x); + data.clippingBox.boxVec = FfxFloat32x3(boxCenterAndVecR.y, boxCenterAndVecG.y, boxCenterAndVecB.y); + data.clippingBox.aabbMin = FfxFloat32x3(aabbMinMaxR.x, aabbMinMaxG.x, aabbMinMaxB.x); + data.clippingBox.aabbMax = FfxFloat32x3(aabbMinMaxR.y, aabbMinMaxG.y, aabbMinMaxB.y); + data.clippingBox.fBoxCenterWeight = FfxFloat32(boxCenterWeight); +#else iSampleIndex = 0; @@ -158,6 +616,8 @@ void ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params, F ++iSampleIndex; } } + +#endif // #if FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS RectificationBoxComputeVarianceBoxData(data.clippingBox); diff --git a/Packages/fidelityfx.fsr/package.json b/Packages/fidelityfx.fsr/package.json index d672c3f..7cba393 100644 --- a/Packages/fidelityfx.fsr/package.json +++ b/Packages/fidelityfx.fsr/package.json @@ -1,6 +1,6 @@ { "name": "fidelityfx.fsr", - "version": "1.0.0", + "version": "1.0.1", "displayName": "FidelityFX FSR", "description": "FidelityFX Super Resolution 2/3 Upscaler core assets", "unity": "2020.1", diff --git a/README.md b/README.md index 9897dd8..3b66a06 100644 --- a/README.md +++ b/README.md @@ -165,9 +165,6 @@ Dynamic resolution works really well in combination with FSR3 Upscaler. Any run- ## Known issues -- Enabling Auto Exposure causes artifacting in OpenGL Core on Nvidia GPUs. - It's uncertain what is causing this. Further investigation is required. - Workaround: disable Auto Exposure on affected platforms. - Texture mipmap bias adjustment is not working on MacOS Metal. This causes blurry textures as the internal render resolution is lowered. This is a Unity issue of some sort. Workaround: no known workaround yet.