From a19906df2a0502b18df0be615f20e152aea853eb Mon Sep 17 00:00:00 2001 From: Nico de Poel Date: Tue, 19 Jul 2022 22:42:03 +0200 Subject: [PATCH] Initial import --- .gitignore | 20 + Assets/Scenes.meta | 8 + Assets/Scenes/SampleScene.unity | 267 ++ Assets/Scenes/SampleScene.unity.meta | 7 + Assets/Shaders.meta | 8 + Assets/Shaders/FSR2.meta | 8 + Assets/Shaders/FSR2/ffx_common_types.h | 426 +++ Assets/Shaders/FSR2/ffx_common_types.h.meta | 27 + Assets/Shaders/FSR2/ffx_core.h | 52 + Assets/Shaders/FSR2/ffx_core.h.meta | 27 + Assets/Shaders/FSR2/ffx_core_cpu.h | 328 ++ Assets/Shaders/FSR2/ffx_core_cpu.h.meta | 27 + Assets/Shaders/FSR2/ffx_core_gpu_common.h | 2784 +++++++++++++++ .../Shaders/FSR2/ffx_core_gpu_common.h.meta | 27 + .../Shaders/FSR2/ffx_core_gpu_common_half.h | 2978 +++++++++++++++++ .../FSR2/ffx_core_gpu_common_half.h.meta | 27 + Assets/Shaders/FSR2/ffx_core_hlsl.h | 1527 +++++++++ Assets/Shaders/FSR2/ffx_core_hlsl.h.meta | 27 + Assets/Shaders/FSR2/ffx_core_portability.h | 50 + .../Shaders/FSR2/ffx_core_portability.h.meta | 27 + Assets/Shaders/FSR2/ffx_fsr1.h | 1246 +++++++ Assets/Shaders/FSR2/ffx_fsr1.h.meta | 27 + Assets/Shaders/FSR2/ffx_fsr2_accumulate.h | 279 ++ .../Shaders/FSR2/ffx_fsr2_accumulate.h.meta | 27 + .../FSR2/ffx_fsr2_accumulate_pass.hlsl | 90 + .../FSR2/ffx_fsr2_accumulate_pass.hlsl.meta | 7 + .../ffx_fsr2_autogen_reactive_pass.compute | 87 + ...fx_fsr2_autogen_reactive_pass.compute.meta | 8 + .../FSR2/ffx_fsr2_callbacks_hlsl.cginc | 933 ++++++ .../FSR2/ffx_fsr2_callbacks_hlsl.cginc.meta | 7 + Assets/Shaders/FSR2/ffx_fsr2_common.cginc | 356 ++ .../Shaders/FSR2/ffx_fsr2_common.cginc.meta | 7 + .../FSR2/ffx_fsr2_compute_luminance_pyramid.h | 188 ++ .../ffx_fsr2_compute_luminance_pyramid.h.meta | 27 + ...x_fsr2_compute_luminance_pyramid_pass.hlsl | 164 + ...2_compute_luminance_pyramid_pass.hlsl.meta | 7 + Assets/Shaders/FSR2/ffx_fsr2_depth_clip.h | 93 + .../Shaders/FSR2/ffx_fsr2_depth_clip.h.meta | 27 + .../FSR2/ffx_fsr2_depth_clip_pass.hlsl | 63 + .../FSR2/ffx_fsr2_depth_clip_pass.hlsl.meta | 7 + Assets/Shaders/FSR2/ffx_fsr2_lock.h | 171 + Assets/Shaders/FSR2/ffx_fsr2_lock.h.meta | 27 + Assets/Shaders/FSR2/ffx_fsr2_lock_pass.hlsl | 66 + .../Shaders/FSR2/ffx_fsr2_lock_pass.hlsl.meta | 7 + .../FSR2/ffx_fsr2_postprocess_lock_status.h | 85 + .../ffx_fsr2_postprocess_lock_status.h.meta | 27 + .../FSR2/ffx_fsr2_prepare_input_color.h | 83 + .../FSR2/ffx_fsr2_prepare_input_color.h.meta | 27 + .../ffx_fsr2_prepare_input_color_pass.hlsl | 64 + ...fx_fsr2_prepare_input_color_pass.hlsl.meta | 7 + Assets/Shaders/FSR2/ffx_fsr2_rcas.h | 105 + Assets/Shaders/FSR2/ffx_fsr2_rcas.h.meta | 27 + Assets/Shaders/FSR2/ffx_fsr2_rcas_pass.hlsl | 88 + .../Shaders/FSR2/ffx_fsr2_rcas_pass.hlsl.meta | 7 + ...ruct_dilated_velocity_and_previous_depth.h | 135 + ...dilated_velocity_and_previous_depth.h.meta | 27 + ..._fsr2_reconstruct_previous_depth_pass.hlsl | 66 + ..._reconstruct_previous_depth_pass.hlsl.meta | 7 + Assets/Shaders/FSR2/ffx_fsr2_reproject.h | 93 + Assets/Shaders/FSR2/ffx_fsr2_reproject.h.meta | 27 + Assets/Shaders/FSR2/ffx_fsr2_resources.h | 89 + Assets/Shaders/FSR2/ffx_fsr2_resources.h.meta | 27 + Assets/Shaders/FSR2/ffx_fsr2_sample.h | 494 +++ Assets/Shaders/FSR2/ffx_fsr2_sample.h.meta | 27 + Assets/Shaders/FSR2/ffx_fsr2_upsample.h | 148 + Assets/Shaders/FSR2/ffx_fsr2_upsample.h.meta | 27 + Assets/Shaders/FSR2/ffx_spd.h | 936 ++++++ Assets/Shaders/FSR2/ffx_spd.h.meta | 27 + FSR2Test.sln | 17 + Packages/manifest.json | 45 + Packages/packages-lock.json | 413 +++ ProjectSettings/AudioManager.asset | 19 + ProjectSettings/ClusterInputManager.asset | 6 + ProjectSettings/DynamicsManager.asset | 34 + ProjectSettings/EditorBuildSettings.asset | 8 + ProjectSettings/EditorSettings.asset | 30 + .../GameCoreScarlettSettings.asset | 11 + ProjectSettings/GameCoreXboxOneSettings.asset | 14 + ProjectSettings/GraphicsSettings.asset | 63 + ProjectSettings/InputManager.asset | 295 ++ ProjectSettings/MemorySettings.asset | 35 + ProjectSettings/NavMeshAreas.asset | 91 + ProjectSettings/PS5Settings.json | 56 + ProjectSettings/PackageManagerSettings.asset | 35 + .../Settings.json | 7 + ProjectSettings/Physics2DSettings.asset | 56 + ProjectSettings/PresetManager.asset | 7 + ProjectSettings/ProjectSettings.asset | 723 ++++ ProjectSettings/ProjectVersion.txt | 2 + ProjectSettings/QualitySettings.asset | 232 ++ ProjectSettings/ScarlettGame.config | 6 + ProjectSettings/TagManager.asset | 43 + ProjectSettings/TimeManager.asset | 9 + ProjectSettings/UnityConnectSettings.asset | 35 + ProjectSettings/VFXManager.asset | 12 + ProjectSettings/VersionControlSettings.asset | 8 + ProjectSettings/XRSettings.asset | 10 + ProjectSettings/XboxOneGame.config | 6 + ProjectSettings/boot.config | 0 99 files changed, 17551 insertions(+) create mode 100644 .gitignore create mode 100644 Assets/Scenes.meta create mode 100644 Assets/Scenes/SampleScene.unity create mode 100644 Assets/Scenes/SampleScene.unity.meta create mode 100644 Assets/Shaders.meta create mode 100644 Assets/Shaders/FSR2.meta create mode 100644 Assets/Shaders/FSR2/ffx_common_types.h create mode 100644 Assets/Shaders/FSR2/ffx_common_types.h.meta create mode 100644 Assets/Shaders/FSR2/ffx_core.h create mode 100644 Assets/Shaders/FSR2/ffx_core.h.meta create mode 100644 Assets/Shaders/FSR2/ffx_core_cpu.h create mode 100644 Assets/Shaders/FSR2/ffx_core_cpu.h.meta create mode 100644 Assets/Shaders/FSR2/ffx_core_gpu_common.h create mode 100644 Assets/Shaders/FSR2/ffx_core_gpu_common.h.meta create mode 100644 Assets/Shaders/FSR2/ffx_core_gpu_common_half.h create mode 100644 Assets/Shaders/FSR2/ffx_core_gpu_common_half.h.meta create mode 100644 Assets/Shaders/FSR2/ffx_core_hlsl.h create mode 100644 Assets/Shaders/FSR2/ffx_core_hlsl.h.meta create mode 100644 Assets/Shaders/FSR2/ffx_core_portability.h create mode 100644 Assets/Shaders/FSR2/ffx_core_portability.h.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr1.h create mode 100644 Assets/Shaders/FSR2/ffx_fsr1.h.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_accumulate.h create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_accumulate.h.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_accumulate_pass.hlsl create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_accumulate_pass.hlsl.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_autogen_reactive_pass.compute create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_autogen_reactive_pass.compute.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_callbacks_hlsl.cginc create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_callbacks_hlsl.cginc.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_common.cginc create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_common.cginc.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_compute_luminance_pyramid.h create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_compute_luminance_pyramid.h.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_compute_luminance_pyramid_pass.hlsl create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_compute_luminance_pyramid_pass.hlsl.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_depth_clip.h create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_depth_clip.h.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_depth_clip_pass.hlsl create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_depth_clip_pass.hlsl.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_lock.h create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_lock.h.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_lock_pass.hlsl create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_lock_pass.hlsl.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_postprocess_lock_status.h create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_postprocess_lock_status.h.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_prepare_input_color.h create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_prepare_input_color.h.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_prepare_input_color_pass.hlsl create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_prepare_input_color_pass.hlsl.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_rcas.h create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_rcas.h.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_rcas_pass.hlsl create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_rcas_pass.hlsl.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_reconstruct_previous_depth_pass.hlsl create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_reconstruct_previous_depth_pass.hlsl.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_reproject.h create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_reproject.h.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_resources.h create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_resources.h.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_sample.h create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_sample.h.meta create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_upsample.h create mode 100644 Assets/Shaders/FSR2/ffx_fsr2_upsample.h.meta create mode 100644 Assets/Shaders/FSR2/ffx_spd.h create mode 100644 Assets/Shaders/FSR2/ffx_spd.h.meta create mode 100644 FSR2Test.sln create mode 100644 Packages/manifest.json create mode 100644 Packages/packages-lock.json create mode 100644 ProjectSettings/AudioManager.asset create mode 100644 ProjectSettings/ClusterInputManager.asset create mode 100644 ProjectSettings/DynamicsManager.asset create mode 100644 ProjectSettings/EditorBuildSettings.asset create mode 100644 ProjectSettings/EditorSettings.asset create mode 100644 ProjectSettings/GameCoreScarlettSettings.asset create mode 100644 ProjectSettings/GameCoreXboxOneSettings.asset create mode 100644 ProjectSettings/GraphicsSettings.asset create mode 100644 ProjectSettings/InputManager.asset create mode 100644 ProjectSettings/MemorySettings.asset create mode 100644 ProjectSettings/NavMeshAreas.asset create mode 100644 ProjectSettings/PS5Settings.json create mode 100644 ProjectSettings/PackageManagerSettings.asset create mode 100644 ProjectSettings/Packages/com.unity.testtools.codecoverage/Settings.json create mode 100644 ProjectSettings/Physics2DSettings.asset create mode 100644 ProjectSettings/PresetManager.asset create mode 100644 ProjectSettings/ProjectSettings.asset create mode 100644 ProjectSettings/ProjectVersion.txt create mode 100644 ProjectSettings/QualitySettings.asset create mode 100644 ProjectSettings/ScarlettGame.config create mode 100644 ProjectSettings/TagManager.asset create mode 100644 ProjectSettings/TimeManager.asset create mode 100644 ProjectSettings/UnityConnectSettings.asset create mode 100644 ProjectSettings/VFXManager.asset create mode 100644 ProjectSettings/VersionControlSettings.asset create mode 100644 ProjectSettings/XRSettings.asset create mode 100644 ProjectSettings/XboxOneGame.config create mode 100644 ProjectSettings/boot.config diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..91bf611 --- /dev/null +++ b/.gitignore @@ -0,0 +1,20 @@ +Library/ +Temp/ +UniQuake.sln +UserSettings/ +.vs/ +Debug/ +Release/ +.idea/ +*.csproj +Logs/ +Build/ +Build64/ +mods/ +*.DotSettings +*.orig +Debug Portable/ +Assets/StreamingAssets/ +Build*/ +*_Debug/ +Data/ diff --git a/Assets/Scenes.meta b/Assets/Scenes.meta new file mode 100644 index 0000000..83c741b --- /dev/null +++ b/Assets/Scenes.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 6ea315d0fd7389c41b19996891e99ae3 +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Scenes/SampleScene.unity b/Assets/Scenes/SampleScene.unity new file mode 100644 index 0000000..2221b04 --- /dev/null +++ b/Assets/Scenes/SampleScene.unity @@ -0,0 +1,267 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!29 &1 +OcclusionCullingSettings: + m_ObjectHideFlags: 0 + serializedVersion: 2 + m_OcclusionBakeSettings: + smallestOccluder: 5 + smallestHole: 0.25 + backfaceThreshold: 100 + m_SceneGUID: 00000000000000000000000000000000 + m_OcclusionCullingData: {fileID: 0} +--- !u!104 &2 +RenderSettings: + m_ObjectHideFlags: 0 + serializedVersion: 9 + m_Fog: 0 + m_FogColor: {r: 0.5, g: 0.5, b: 0.5, a: 1} + m_FogMode: 3 + m_FogDensity: 0.01 + m_LinearFogStart: 0 + m_LinearFogEnd: 300 + m_AmbientSkyColor: {r: 0.212, g: 0.227, b: 0.259, a: 1} + m_AmbientEquatorColor: {r: 0.114, g: 0.125, b: 0.133, a: 1} + m_AmbientGroundColor: {r: 0.047, g: 0.043, b: 0.035, a: 1} + m_AmbientIntensity: 1 + m_AmbientMode: 0 + m_SubtractiveShadowColor: {r: 0.42, g: 0.478, b: 0.627, a: 1} + m_SkyboxMaterial: {fileID: 10304, guid: 0000000000000000f000000000000000, type: 0} + m_HaloStrength: 0.5 + m_FlareStrength: 1 + m_FlareFadeSpeed: 3 + m_HaloTexture: {fileID: 0} + m_SpotCookie: {fileID: 10001, guid: 0000000000000000e000000000000000, type: 0} + m_DefaultReflectionMode: 0 + m_DefaultReflectionResolution: 128 + m_ReflectionBounces: 1 + m_ReflectionIntensity: 1 + m_CustomReflection: {fileID: 0} + m_Sun: {fileID: 705507994} + m_IndirectSpecularColor: {r: 0, g: 0, b: 0, a: 1} + m_UseRadianceAmbientProbe: 0 +--- !u!157 &3 +LightmapSettings: + m_ObjectHideFlags: 0 + serializedVersion: 12 + m_GIWorkflowMode: 1 + m_GISettings: + serializedVersion: 2 + m_BounceScale: 1 + m_IndirectOutputScale: 1 + m_AlbedoBoost: 1 + m_EnvironmentLightingMode: 0 + m_EnableBakedLightmaps: 1 + m_EnableRealtimeLightmaps: 0 + m_LightmapEditorSettings: + serializedVersion: 12 + m_Resolution: 2 + m_BakeResolution: 40 + m_AtlasSize: 1024 + m_AO: 0 + m_AOMaxDistance: 1 + m_CompAOExponent: 1 + m_CompAOExponentDirect: 0 + m_ExtractAmbientOcclusion: 0 + m_Padding: 2 + m_LightmapParameters: {fileID: 0} + m_LightmapsBakeMode: 1 + m_TextureCompression: 1 + m_FinalGather: 0 + m_FinalGatherFiltering: 1 + m_FinalGatherRayCount: 256 + m_ReflectionCompression: 2 + m_MixedBakeMode: 2 + m_BakeBackend: 1 + m_PVRSampling: 1 + m_PVRDirectSampleCount: 32 + m_PVRSampleCount: 500 + m_PVRBounces: 2 + m_PVREnvironmentSampleCount: 500 + m_PVREnvironmentReferencePointCount: 2048 + m_PVRFilteringMode: 2 + m_PVRDenoiserTypeDirect: 0 + m_PVRDenoiserTypeIndirect: 0 + m_PVRDenoiserTypeAO: 0 + m_PVRFilterTypeDirect: 0 + m_PVRFilterTypeIndirect: 0 + m_PVRFilterTypeAO: 0 + m_PVREnvironmentMIS: 0 + m_PVRCulling: 1 + m_PVRFilteringGaussRadiusDirect: 1 + m_PVRFilteringGaussRadiusIndirect: 5 + m_PVRFilteringGaussRadiusAO: 2 + m_PVRFilteringAtrousPositionSigmaDirect: 0.5 + m_PVRFilteringAtrousPositionSigmaIndirect: 2 + m_PVRFilteringAtrousPositionSigmaAO: 1 + m_ExportTrainingData: 0 + m_TrainingDataDestination: TrainingData + m_LightProbeSampleCountMultiplier: 4 + m_LightingDataAsset: {fileID: 0} + m_LightingSettings: {fileID: 0} +--- !u!196 &4 +NavMeshSettings: + serializedVersion: 2 + m_ObjectHideFlags: 0 + m_BuildSettings: + serializedVersion: 2 + agentTypeID: 0 + agentRadius: 0.5 + agentHeight: 2 + agentSlope: 45 + agentClimb: 0.4 + ledgeDropHeight: 0 + maxJumpAcrossDistance: 0 + minRegionArea: 2 + manualCellSize: 0 + cellSize: 0.16666667 + manualTileSize: 0 + tileSize: 256 + accuratePlacement: 0 + debug: + m_Flags: 0 + m_NavMeshData: {fileID: 0} +--- !u!1 &705507993 +GameObject: + m_ObjectHideFlags: 0 + m_CorrespondingSourceObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + serializedVersion: 6 + m_Component: + - component: {fileID: 705507995} + - component: {fileID: 705507994} + m_Layer: 0 + m_Name: Directional Light + m_TagString: Untagged + m_Icon: {fileID: 0} + m_NavMeshLayer: 0 + m_StaticEditorFlags: 0 + m_IsActive: 1 +--- !u!108 &705507994 +Light: + m_ObjectHideFlags: 0 + m_CorrespondingSourceObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 705507993} + m_Enabled: 1 + serializedVersion: 8 + m_Type: 1 + m_Color: {r: 1, g: 0.95686275, b: 0.8392157, a: 1} + m_Intensity: 1 + m_Range: 10 + m_SpotAngle: 30 + m_CookieSize: 10 + m_Shadows: + m_Type: 2 + m_Resolution: -1 + m_CustomResolution: -1 + m_Strength: 1 + m_Bias: 0.05 + m_NormalBias: 0.4 + m_NearPlane: 0.2 + m_Cookie: {fileID: 0} + m_DrawHalo: 0 + m_Flare: {fileID: 0} + m_RenderMode: 0 + m_CullingMask: + serializedVersion: 2 + m_Bits: 4294967295 + m_Lightmapping: 1 + m_LightShadowCasterMode: 0 + m_AreaSize: {x: 1, y: 1} + m_BounceIntensity: 1 + m_ColorTemperature: 6570 + m_UseColorTemperature: 0 + m_ShadowRadius: 0 + m_ShadowAngle: 0 +--- !u!4 &705507995 +Transform: + m_ObjectHideFlags: 0 + m_CorrespondingSourceObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 705507993} + m_LocalRotation: {x: 0.40821788, y: -0.23456968, z: 0.10938163, w: 0.8754261} + m_LocalPosition: {x: 0, y: 3, z: 0} + m_LocalScale: {x: 1, y: 1, z: 1} + m_Children: [] + m_Father: {fileID: 0} + m_RootOrder: 1 + m_LocalEulerAnglesHint: {x: 50, y: -30, z: 0} +--- !u!1 &963194225 +GameObject: + m_ObjectHideFlags: 0 + m_CorrespondingSourceObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + serializedVersion: 6 + m_Component: + - component: {fileID: 963194228} + - component: {fileID: 963194227} + - component: {fileID: 963194226} + m_Layer: 0 + m_Name: Main Camera + m_TagString: MainCamera + m_Icon: {fileID: 0} + m_NavMeshLayer: 0 + m_StaticEditorFlags: 0 + m_IsActive: 1 +--- !u!81 &963194226 +AudioListener: + m_ObjectHideFlags: 0 + m_CorrespondingSourceObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 963194225} + m_Enabled: 1 +--- !u!20 &963194227 +Camera: + m_ObjectHideFlags: 0 + m_CorrespondingSourceObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 963194225} + m_Enabled: 1 + serializedVersion: 2 + m_ClearFlags: 1 + m_BackGroundColor: {r: 0.19215687, g: 0.3019608, b: 0.4745098, a: 0} + m_projectionMatrixMode: 1 + m_SensorSize: {x: 36, y: 24} + m_LensShift: {x: 0, y: 0} + m_GateFitMode: 2 + m_FocalLength: 50 + m_NormalizedViewPortRect: + serializedVersion: 2 + x: 0 + y: 0 + width: 1 + height: 1 + near clip plane: 0.3 + far clip plane: 1000 + field of view: 60 + orthographic: 0 + orthographic size: 5 + m_Depth: -1 + m_CullingMask: + serializedVersion: 2 + m_Bits: 4294967295 + m_RenderingPath: -1 + m_TargetTexture: {fileID: 0} + m_TargetDisplay: 0 + m_TargetEye: 3 + m_HDR: 1 + m_AllowMSAA: 1 + m_AllowDynamicResolution: 0 + m_ForceIntoRT: 0 + m_OcclusionCulling: 1 + m_StereoConvergence: 10 + m_StereoSeparation: 0.022 +--- !u!4 &963194228 +Transform: + m_ObjectHideFlags: 0 + m_CorrespondingSourceObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 963194225} + m_LocalRotation: {x: 0, y: 0, z: 0, w: 1} + m_LocalPosition: {x: 0, y: 1, z: -10} + m_LocalScale: {x: 1, y: 1, z: 1} + m_Children: [] + m_Father: {fileID: 0} + m_RootOrder: 0 + m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0} diff --git a/Assets/Scenes/SampleScene.unity.meta b/Assets/Scenes/SampleScene.unity.meta new file mode 100644 index 0000000..952bd1e --- /dev/null +++ b/Assets/Scenes/SampleScene.unity.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 9fc0d4010bbf28b4594072e72b8655ab +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders.meta b/Assets/Shaders.meta new file mode 100644 index 0000000..5008ba1 --- /dev/null +++ b/Assets/Shaders.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 899452302d6bfbd41905b6c899c7e545 +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2.meta b/Assets/Shaders/FSR2.meta new file mode 100644 index 0000000..86884c9 --- /dev/null +++ b/Assets/Shaders/FSR2.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: fe1c2c243614a5a4db5ced9f9ea2d7c4 +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_common_types.h b/Assets/Shaders/FSR2/ffx_common_types.h new file mode 100644 index 0000000..abcb979 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_common_types.h @@ -0,0 +1,426 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_COMMON_TYPES_H +#define FFX_COMMON_TYPES_H + +#if defined(FFX_CPU) +#define FFX_PARAMETER_IN +#define FFX_PARAMETER_OUT +#define FFX_PARAMETER_INOUT +#elif defined(FFX_HLSL) +#define FFX_PARAMETER_IN in +#define FFX_PARAMETER_OUT out +#define FFX_PARAMETER_INOUT inout +#elif defined(FFX_GLSL) +#define FFX_PARAMETER_IN in +#define FFX_PARAMETER_OUT out +#define FFX_PARAMETER_INOUT inout +#endif // #if defined(FFX_CPU) + +#if defined(FFX_CPU) +/// A typedef for a boolean value. +/// +/// @ingroup CPU +typedef bool FfxBoolean; + +/// A typedef for a unsigned 8bit integer. +/// +/// @ingroup CPU +typedef uint8_t FfxUInt8; + +/// A typedef for a unsigned 16bit integer. +/// +/// @ingroup CPU +typedef uint16_t FfxUInt16; + +/// A typedef for a unsigned 32bit integer. +/// +/// @ingroup CPU +typedef uint32_t FfxUInt32; + +/// A typedef for a unsigned 64bit integer. +/// +/// @ingroup CPU +typedef uint64_t FfxUInt64; + +/// A typedef for a signed 8bit integer. +/// +/// @ingroup CPU +typedef int8_t FfxInt8; + +/// A typedef for a signed 16bit integer. +/// +/// @ingroup CPU +typedef int16_t FfxInt16; + +/// A typedef for a signed 32bit integer. +/// +/// @ingroup CPU +typedef int32_t FfxInt32; + +/// A typedef for a signed 64bit integer. +/// +/// @ingroup CPU +typedef int64_t FfxInt64; + +/// A typedef for a floating point value. +/// +/// @ingroup CPU +typedef float FfxFloat32; + +/// A typedef for a 2-dimensional floating point value. +/// +/// @ingroup CPU +typedef float FfxFloat32x2[2]; + +/// A typedef for a 3-dimensional floating point value. +/// +/// @ingroup CPU +typedef float FfxFloat32x3[3]; + +/// A typedef for a 4-dimensional floating point value. +/// +/// @ingroup CPU +typedef float FfxFloat32x4[4]; + +/// A typedef for a 2-dimensional 32bit unsigned integer. +/// +/// @ingroup CPU +typedef uint32_t FfxUInt32x2[2]; + +/// A typedef for a 3-dimensional 32bit unsigned integer. +/// +/// @ingroup CPU +typedef uint32_t FfxUInt32x3[3]; + +/// A typedef for a 4-dimensional 32bit unsigned integer. +/// +/// @ingroup CPU +typedef uint32_t FfxUInt32x4[4]; +#endif // #if defined(FFX_CPU) + +#if defined(FFX_HLSL) +/// A typedef for a boolean value. +/// +/// @ingroup GPU +typedef bool FfxBoolean; + +#if FFX_HLSL_6_2 +typedef float32_t FfxFloat32; +typedef float32_t2 FfxFloat32x2; +typedef float32_t3 FfxFloat32x3; +typedef float32_t4 FfxFloat32x4; + +/// A typedef for a unsigned 32bit integer. +/// +/// @ingroup GPU +typedef uint32_t FfxUInt32; +typedef uint32_t2 FfxUInt32x2; +typedef uint32_t3 FfxUInt32x3; +typedef uint32_t4 FfxUInt32x4; +typedef int32_t FfxInt32; +typedef int32_t2 FfxInt32x2; +typedef int32_t3 FfxInt32x3; +typedef int32_t4 FfxInt32x4; +#else +#define FfxFloat32 float +#define FfxFloat32x2 float2 +#define FfxFloat32x3 float3 +#define FfxFloat32x4 float4 + +/// A typedef for a unsigned 32bit integer. +/// +/// @ingroup GPU +typedef uint FfxUInt32; +typedef uint2 FfxUInt32x2; +typedef uint3 FfxUInt32x3; +typedef uint4 FfxUInt32x4; +typedef int FfxInt32; +typedef int2 FfxInt32x2; +typedef int3 FfxInt32x3; +typedef int4 FfxInt32x4; +#endif // #if defined(FFX_HLSL_6_2) + +#if FFX_HALF +#if FFX_HLSL_6_2 +typedef float16_t FfxFloat16; +typedef float16_t2 FfxFloat16x2; +typedef float16_t3 FfxFloat16x3; +typedef float16_t4 FfxFloat16x4; + +/// A typedef for an unsigned 16bit integer. +/// +/// @ingroup GPU +typedef uint16_t FfxUInt16; +typedef uint16_t2 FfxUInt16x2; +typedef uint16_t3 FfxUInt16x3; +typedef uint16_t4 FfxUInt16x4; + +/// A typedef for a signed 16bit integer. +/// +/// @ingroup GPU +typedef int16_t FfxInt16; +typedef int16_t2 FfxInt16x2; +typedef int16_t3 FfxInt16x3; +typedef int16_t4 FfxInt16x4; +#else +typedef min16float FfxFloat16; +typedef min16float2 FfxFloat16x2; +typedef min16float3 FfxFloat16x3; +typedef min16float4 FfxFloat16x4; + +/// A typedef for an unsigned 16bit integer. +/// +/// @ingroup GPU +typedef min16uint FfxUInt16; +typedef min16uint2 FfxUInt16x2; +typedef min16uint3 FfxUInt16x3; +typedef min16uint4 FfxUInt16x4; + +/// A typedef for a signed 16bit integer. +/// +/// @ingroup GPU +typedef min16int FfxInt16; +typedef min16int2 FfxInt16x2; +typedef min16int3 FfxInt16x3; +typedef min16int4 FfxInt16x4; +#endif // FFX_HLSL_6_2 +#endif // FFX_HALF +#endif // #if defined(FFX_HLSL) + +#if defined(FFX_GLSL) +/// A typedef for a boolean value. +/// +/// @ingroup GPU +#define FfxBoolean bool +#define FfxFloat32 float +#define FfxFloat32x2 vec2 +#define FfxFloat32x3 vec3 +#define FfxFloat32x4 vec4 +#define FfxUInt32 uint +#define FfxUInt32x2 uvec2 +#define FfxUInt32x3 uvec3 +#define FfxUInt32x4 uvec4 +#define FfxInt32 int +#define FfxInt32x2 ivec2 +#define FfxInt32x3 ivec3 +#define FfxInt32x4 ivec4 +#if FFX_HALF +#define FfxFloat16 float16_t +#define FfxFloat16x2 f16vec2 +#define FfxFloat16x3 f16vec3 +#define FfxFloat16x4 f16vec4 +#define FfxUInt16 uint16_t +#define FfxUInt16x2 u16vec2 +#define FfxUInt16x3 u16vec3 +#define FfxUInt16x4 u16vec4 +#define FfxInt16 int16_t +#define FfxInt16x2 i16vec2 +#define FfxInt16x3 i16vec3 +#define FfxInt16x4 i16vec4 +#endif // FFX_HALF +#endif // #if defined(FFX_GLSL) + +// Global toggles: +// #define FFX_HALF (1) +// #define FFX_HLSL_6_2 (1) + +#if FFX_HALF + +#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef min16##BaseComponentType TypeName; +#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#if FFX_HLSL_6_2 + +#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType##16_t TypeName; +#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#else //FFX_HLSL_6_2 + +#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) FFX_MIN16_SCALAR( TypeName, BaseComponentType ); +#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ); +#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ); + +#endif //FFX_HLSL_6_2 + +#else //FFX_HALF + +#define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType TypeName; +#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#define FFX_16BIT_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType TypeName; +#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; +#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; + +#endif //FFX_HALF + +#if defined(FFX_GPU) +// Common typedefs: +#if defined(FFX_HLSL) +FFX_MIN16_SCALAR( FFX_MIN16_F , float ); +FFX_MIN16_VECTOR( FFX_MIN16_F2, float, 2 ); +FFX_MIN16_VECTOR( FFX_MIN16_F3, float, 3 ); +FFX_MIN16_VECTOR( FFX_MIN16_F4, float, 4 ); + +FFX_MIN16_SCALAR( FFX_MIN16_I, int ); +FFX_MIN16_VECTOR( FFX_MIN16_I2, int, 2 ); +FFX_MIN16_VECTOR( FFX_MIN16_I3, int, 3 ); +FFX_MIN16_VECTOR( FFX_MIN16_I4, int, 4 ); + +FFX_MIN16_SCALAR( FFX_MIN16_U, uint ); +FFX_MIN16_VECTOR( FFX_MIN16_U2, uint, 2 ); +FFX_MIN16_VECTOR( FFX_MIN16_U3, uint, 3 ); +FFX_MIN16_VECTOR( FFX_MIN16_U4, uint, 4 ); + +FFX_16BIT_SCALAR( FFX_F16_t , float ); +FFX_16BIT_VECTOR( FFX_F16_t2, float, 2 ); +FFX_16BIT_VECTOR( FFX_F16_t3, float, 3 ); +FFX_16BIT_VECTOR( FFX_F16_t4, float, 4 ); + +FFX_16BIT_SCALAR( FFX_I16_t, int ); +FFX_16BIT_VECTOR( FFX_I16_t2, int, 2 ); +FFX_16BIT_VECTOR( FFX_I16_t3, int, 3 ); +FFX_16BIT_VECTOR( FFX_I16_t4, int, 4 ); + +FFX_16BIT_SCALAR( FFX_U16_t, uint ); +FFX_16BIT_VECTOR( FFX_U16_t2, uint, 2 ); +FFX_16BIT_VECTOR( FFX_U16_t3, uint, 3 ); +FFX_16BIT_VECTOR( FFX_U16_t4, uint, 4 ); + +#define TYPEDEF_MIN16_TYPES(Prefix) \ +typedef FFX_MIN16_F Prefix##_F; \ +typedef FFX_MIN16_F2 Prefix##_F2; \ +typedef FFX_MIN16_F3 Prefix##_F3; \ +typedef FFX_MIN16_F4 Prefix##_F4; \ +typedef FFX_MIN16_I Prefix##_I; \ +typedef FFX_MIN16_I2 Prefix##_I2; \ +typedef FFX_MIN16_I3 Prefix##_I3; \ +typedef FFX_MIN16_I4 Prefix##_I4; \ +typedef FFX_MIN16_U Prefix##_U; \ +typedef FFX_MIN16_U2 Prefix##_U2; \ +typedef FFX_MIN16_U3 Prefix##_U3; \ +typedef FFX_MIN16_U4 Prefix##_U4; + +#define TYPEDEF_16BIT_TYPES(Prefix) \ +typedef FFX_16BIT_F Prefix##_F; \ +typedef FFX_16BIT_F2 Prefix##_F2; \ +typedef FFX_16BIT_F3 Prefix##_F3; \ +typedef FFX_16BIT_F4 Prefix##_F4; \ +typedef FFX_16BIT_I Prefix##_I; \ +typedef FFX_16BIT_I2 Prefix##_I2; \ +typedef FFX_16BIT_I3 Prefix##_I3; \ +typedef FFX_16BIT_I4 Prefix##_I4; \ +typedef FFX_16BIT_U Prefix##_U; \ +typedef FFX_16BIT_U2 Prefix##_U2; \ +typedef FFX_16BIT_U3 Prefix##_U3; \ +typedef FFX_16BIT_U4 Prefix##_U4; + +#define TYPEDEF_FULL_PRECISION_TYPES(Prefix) \ +typedef FfxFloat32 Prefix##_F; \ +typedef FfxFloat32x2 Prefix##_F2; \ +typedef FfxFloat32x3 Prefix##_F3; \ +typedef FfxFloat32x4 Prefix##_F4; \ +typedef FfxInt32 Prefix##_I; \ +typedef FfxInt32x2 Prefix##_I2; \ +typedef FfxInt32x3 Prefix##_I3; \ +typedef FfxInt32x4 Prefix##_I4; \ +typedef FfxUInt32 Prefix##_U; \ +typedef FfxUInt32x2 Prefix##_U2; \ +typedef FfxUInt32x3 Prefix##_U3; \ +typedef FfxUInt32x4 Prefix##_U4; +#endif // #if defined(FFX_HLSL) + +#if defined(FFX_GLSL) + +#if FFX_HALF + +#define FFX_MIN16_F float16_t +#define FFX_MIN16_F2 f16vec2 +#define FFX_MIN16_F3 f16vec3 +#define FFX_MIN16_F4 f16vec4 + +#define FFX_MIN16_I int16_t +#define FFX_MIN16_I2 i16vec2 +#define FFX_MIN16_I3 i16vec3 +#define FFX_MIN16_I4 i16vec4 + +#define FFX_MIN16_U uint16_t +#define FFX_MIN16_U2 u16vec2 +#define FFX_MIN16_U3 u16vec3 +#define FFX_MIN16_U4 u16vec4 + +#define FFX_16BIT_F float16_t +#define FFX_16BIT_F2 f16vec2 +#define FFX_16BIT_F3 f16vec3 +#define FFX_16BIT_F4 f16vec4 + +#define FFX_16BIT_I int16_t +#define FFX_16BIT_I2 i16vec2 +#define FFX_16BIT_I3 i16vec3 +#define FFX_16BIT_I4 i16vec4 + +#define FFX_16BIT_U uint16_t +#define FFX_16BIT_U2 u16vec2 +#define FFX_16BIT_U3 u16vec3 +#define FFX_16BIT_U4 u16vec4 + +#else // FFX_HALF + +#define FFX_MIN16_F float +#define FFX_MIN16_F2 vec2 +#define FFX_MIN16_F3 vec3 +#define FFX_MIN16_F4 vec4 + +#define FFX_MIN16_I int +#define FFX_MIN16_I2 ivec2 +#define FFX_MIN16_I3 ivec3 +#define FFX_MIN16_I4 ivec4 + +#define FFX_MIN16_U uint +#define FFX_MIN16_U2 uvec2 +#define FFX_MIN16_U3 uvec3 +#define FFX_MIN16_U4 uvec4 + +#define FFX_16BIT_F float +#define FFX_16BIT_F2 vec2 +#define FFX_16BIT_F3 vec3 +#define FFX_16BIT_F4 vec4 + +#define FFX_16BIT_I int +#define FFX_16BIT_I2 ivec2 +#define FFX_16BIT_I3 ivec3 +#define FFX_16BIT_I4 ivec4 + +#define FFX_16BIT_U uint +#define FFX_16BIT_U2 uvec2 +#define FFX_16BIT_U3 uvec3 +#define FFX_16BIT_U4 uvec4 + +#endif // FFX_HALF + +#endif // #if defined(FFX_GLSL) + +#endif // #if defined(FFX_GPU) +#endif // #ifndef FFX_COMMON_TYPES_H diff --git a/Assets/Shaders/FSR2/ffx_common_types.h.meta b/Assets/Shaders/FSR2/ffx_common_types.h.meta new file mode 100644 index 0000000..e0d5ef5 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_common_types.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: cdf3230fbd57f114488da1c8ae7a85f3 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_core.h b/Assets/Shaders/FSR2/ffx_core.h new file mode 100644 index 0000000..3a66f44 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_core.h @@ -0,0 +1,52 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +/// @defgroup Core +/// @defgroup HLSL +/// @defgroup GLSL +/// @defgroup GPU +/// @defgroup CPU +/// @defgroup CAS +/// @defgroup FSR1 + +#if !defined(FFX_CORE_H) +#define FFX_CORE_H + +#include "ffx_common_types.h" + +#if defined(FFX_CPU) + #include "ffx_core_cpu.h" +#endif // #if defined(FFX_CPU) + +#if defined(FFX_GLSL) && defined(FFX_GPU) + #include "ffx_core_glsl.h" +#endif // #if defined(FFX_GLSL) && defined(FFX_GPU) + +#if defined(FFX_HLSL) && defined(FFX_GPU) + #include "ffx_core_hlsl.h" +#endif // #if defined(FFX_HLSL) && defined(FFX_GPU) + +#if defined(FFX_GPU) + #include "ffx_core_gpu_common.h" + #include "ffx_core_gpu_common_half.h" + #include "ffx_core_portability.h" +#endif // #if defined(FFX_GPU) +#endif // #if !defined(FFX_CORE_H) \ No newline at end of file diff --git a/Assets/Shaders/FSR2/ffx_core.h.meta b/Assets/Shaders/FSR2/ffx_core.h.meta new file mode 100644 index 0000000..0ea5e13 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_core.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 646b382e52e76844d928b5530e3e7586 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_core_cpu.h b/Assets/Shaders/FSR2/ffx_core_cpu.h new file mode 100644 index 0000000..b753459 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_core_cpu.h @@ -0,0 +1,328 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +/// A define for a true value in a boolean expression. +/// +/// @ingroup CPU +#define FFX_TRUE (1) + +/// A define for a false value in a boolean expression. +/// +/// @ingroup CPU +#define FFX_FALSE (0) + +#if !defined(FFX_STATIC) +/// A define to abstract declaration of static variables and functions. +/// +/// @ingroup CPU +#define FFX_STATIC static +#endif // #if !defined(FFX_STATIC) + +/// Interpret the bit layout of an IEEE-754 floating point value as an unsigned integer. +/// +/// @param [in] x A 32bit floating value. +/// +/// @returns +/// An unsigned 32bit integer value containing the bit pattern of x. +/// +/// @ingroup CPU +FFX_STATIC FfxUInt32 ffxAsUInt32(FfxFloat32 x) +{ + union + { + FfxFloat32 f; + FfxUInt32 u; + } bits; + + bits.f = x; + return bits.u; +} + +FFX_STATIC FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b) +{ + return a[0] * b[0] + a[1] * b[1]; +} + +FFX_STATIC FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b) +{ + return a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; +} + +FFX_STATIC FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b) +{ + return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3]; +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the GLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup CPU +FFX_STATIC FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t) +{ + return y * t + (-x * t + x); +} + +/// Compute the reciprocal of a value. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal value of x. +/// +/// @ingroup CPU +FFX_STATIC FfxFloat32 ffxReciprocal(FfxFloat32 a) +{ + return 1.0f / a; +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup CPU +FFX_STATIC FfxFloat32 ffxSqrt(FfxFloat32 x) +{ + return sqrt(x); +} + +FFX_STATIC FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b) +{ + return FfxUInt32(FfxInt32(a) >> FfxInt32(b)); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup CPU +FFX_STATIC FfxFloat32 ffxFract(FfxFloat32 a) +{ + return a - floor(a); +} + +/// Compute the reciprocal square root of a value. +/// +/// @param [in] x The value to compute the reciprocal for. +/// +/// @returns +/// The reciprocal square root value of x. +/// +/// @ingroup CPU +FFX_STATIC FfxFloat32 rsqrt(FfxFloat32 a) +{ + return ffxReciprocal(ffxSqrt(a)); +} + +FFX_STATIC FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y) +{ + return x < y ? x : y; +} + +FFX_STATIC FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y) +{ + return x < y ? x : y; +} + +FFX_STATIC FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y) +{ + return x > y ? x : y; +} + +FFX_STATIC FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y) +{ + return x > y ? x : y; +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup CPU +FFX_STATIC FfxFloat32 ffxSaturate(FfxFloat32 a) +{ + return ffxMin(1.0f, ffxMax(0.0f, a)); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +FFX_STATIC void opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +{ + d[0] = a[0] + b; + d[1] = a[1] + b; + d[2] = a[2] + b; + return; +} + +FFX_STATIC void opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a) +{ + d[0] = a[0]; + d[1] = a[1]; + d[2] = a[2]; + return; +} + +FFX_STATIC void opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b) +{ + d[0] = a[0] * b[0]; + d[1] = a[1] * b[1]; + d[2] = a[2] * b[2]; + return; +} + +FFX_STATIC void opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +{ + d[0] = a[0] * b; + d[1] = a[1] * b; + d[2] = a[2] * b; + return; +} + +FFX_STATIC void opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a) +{ + d[0] = ffxReciprocal(a[0]); + d[1] = ffxReciprocal(a[1]); + d[2] = ffxReciprocal(a[2]); + return; +} + +/// Convert FfxFloat32 to half (in lower 16-bits of output). +/// +/// This function implements the same fast technique that is documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf +/// +/// The function supports denormals. +/// +/// Some conversion rules are to make computations possibly "safer" on the GPU, +/// -INF & -NaN -> -65504 +/// +INF & +NaN -> +65504 +/// +/// @param [in] f The 32bit floating point value to convert. +/// +/// @returns +/// The closest 16bit floating point value to f. +/// +/// @ingroup CPU +FFX_STATIC FfxUInt32 f32tof16(FfxFloat32 f) +{ + static FfxUInt16 base[512] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, + 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, + 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, + 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, + 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, + 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, 0xf000, 0xf400, 0xf800, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, + 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff + }; + + static FfxUInt8 shift[512] = { + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18 + }; + + union + { + FfxFloat32 f; + FfxUInt32 u; + } bits; + + bits.f = f; + FfxUInt32 u = bits.u; + FfxUInt32 i = u >> 23; + return (FfxUInt32)(base[i]) + ((u & 0x7fffff) >> shift[i]); +} + +/// Pack 2x32-bit floating point values in a single 32bit value. +/// +/// This function first converts each component of value into their nearest 16-bit floating +/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the +/// 32bit unsigned integer respectively. +/// +/// @param [in] value A 2-dimensional floating point value to convert and pack. +/// +/// @returns +/// A packed 32bit value containing 2 16bit floating point values. +/// +/// @ingroup CPU +FFX_STATIC FfxUInt32 packHalf2x16(FfxFloat32x2 a) +{ + return f32tof16(a[0]) + (f32tof16(a[1]) << 16); +} diff --git a/Assets/Shaders/FSR2/ffx_core_cpu.h.meta b/Assets/Shaders/FSR2/ffx_core_cpu.h.meta new file mode 100644 index 0000000..3fcfdd1 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_core_cpu.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 5e03235b410e0b344b3de52efe50de9b +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_core_gpu_common.h b/Assets/Shaders/FSR2/ffx_core_gpu_common.h new file mode 100644 index 0000000..3a49c55 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_core_gpu_common.h @@ -0,0 +1,2784 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +/// A define for a true value in a boolean expression. +/// +/// @ingroup GPU +#define FFX_TRUE (true) + +/// A define for a false value in a boolean expression. +/// +/// @ingroup GPU +#define FFX_FALSE (false) + +/// A define value for positive infinity. +/// +/// @ingroup GPU +#define FFX_POSITIVE_INFINITY_FLOAT ffxAsFloat(0x7f800000u) + +/// A define value for negative infinity. +/// +/// @ingroup GPU +#define FFX_NEGATIVE_INFINITY_FLOAT ffxAsFloat(0xff800000u) + +/// A define value for PI. +/// +/// @ingroup GPU +#define FFX_PI (3.14159) + + +/// Compute the reciprocal of value. +/// +/// @param [in] value The value to compute the reciprocal of. +/// +/// @returns +/// The 1 / value. +/// +/// @ingroup GPU +FfxFloat32 ffxReciprocal(FfxFloat32 value) +{ + return rcp(value); +} + +/// Compute the reciprocal of value. +/// +/// @param [in] value The value to compute the reciprocal of. +/// +/// @returns +/// The 1 / value. +/// +/// @ingroup GPU +FfxFloat32x2 ffxReciprocal(FfxFloat32x2 value) +{ + return rcp(value); +} + +/// Compute the reciprocal of value. +/// +/// @param [in] value The value to compute the reciprocal of. +/// +/// @returns +/// The 1 / value. +/// +/// @ingroup GPU +FfxFloat32x3 ffxReciprocal(FfxFloat32x3 value) +{ + return rcp(value); +} + +/// Compute the reciprocal of value. +/// +/// @param [in] value The value to compute the reciprocal of. +/// +/// @returns +/// The 1 / value. +/// +/// @ingroup GPU +FfxFloat32x4 ffxReciprocal(FfxFloat32x4 value) +{ + return rcp(value); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxFloat32x2 ffxMin(FfxFloat32x2 x, FfxFloat32x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxFloat32x3 ffxMin(FfxFloat32x3 x, FfxFloat32x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxFloat32x4 ffxMin(FfxFloat32x4 x, FfxFloat32x4 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxInt32 ffxMin(FfxInt32 x, FfxInt32 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxInt32x2 ffxMin(FfxInt32x2 x, FfxInt32x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxInt32x3 ffxMin(FfxInt32x3 x, FfxInt32x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxInt32x4 ffxMin(FfxInt32x4 x, FfxInt32x4 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxUInt32x2 ffxMin(FfxUInt32x2 x, FfxUInt32x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxUInt32x3 ffxMin(FfxUInt32x3 x, FfxUInt32x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxUInt32x4 ffxMin(FfxUInt32x4 x, FfxUInt32x4 y) +{ + return min(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxFloat32x2 ffxMax(FfxFloat32x2 x, FfxFloat32x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxFloat32x3 ffxMax(FfxFloat32x3 x, FfxFloat32x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxFloat32x4 ffxMax(FfxFloat32x4 x, FfxFloat32x4 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxInt32 ffxMax(FfxInt32 x, FfxInt32 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxInt32x2 ffxMax(FfxInt32x2 x, FfxInt32x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxInt32x3 ffxMax(FfxInt32x3 x, FfxInt32x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxInt32x4 ffxMax(FfxInt32x4 x, FfxInt32x4 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxUInt32x2 ffxMax(FfxUInt32x2 x, FfxUInt32x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxUInt32x3 ffxMax(FfxUInt32x3 x, FfxUInt32x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxUInt32x4 ffxMax(FfxUInt32x4 x, FfxUInt32x4 y) +{ + return max(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPU +FfxFloat32 ffxPow(FfxFloat32 x, FfxFloat32 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPU +FfxFloat32x2 ffxPow(FfxFloat32x2 x, FfxFloat32x2 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPU +FfxFloat32x3 ffxPow(FfxFloat32x3 x, FfxFloat32x3 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPU +FfxFloat32x4 ffxPow(FfxFloat32x4 x, FfxFloat32x4 y) +{ + return pow(x, y); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPU +FfxFloat32 ffxSqrt(FfxFloat32 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPU +FfxFloat32x2 ffxSqrt(FfxFloat32x2 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPU +FfxFloat32x3 ffxSqrt(FfxFloat32x3 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPU +FfxFloat32x4 ffxSqrt(FfxFloat32x4 x) +{ + return sqrt(x); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPU +FfxFloat32 ffxCopySignBit(FfxFloat32 d, FfxFloat32 s) +{ + return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & FfxUInt32(0x80000000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPU +FfxFloat32x2 ffxCopySignBit(FfxFloat32x2 d, FfxFloat32x2 s) +{ + return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast2(0x80000000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPU +FfxFloat32x3 ffxCopySignBit(FfxFloat32x3 d, FfxFloat32x3 s) +{ + return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast3(0x80000000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPU +FfxFloat32x4 ffxCopySignBit(FfxFloat32x4 d, FfxFloat32x4 s) +{ + return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast4(0x80000000u))); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPU +FfxFloat32 ffxIsSigned(FfxFloat32 m) +{ + return ffxSaturate(m * FfxFloat32(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPU +FfxFloat32x2 ffxIsSigned(FfxFloat32x2 m) +{ + return ffxSaturate(m * ffxBroadcast2(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPU +FfxFloat32x3 ffxIsSigned(FfxFloat32x3 m) +{ + return ffxSaturate(m * ffxBroadcast3(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against for have the sign set. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or positive. +/// +/// @ingroup GPU +FfxFloat32x4 ffxIsSigned(FfxFloat32x4 m) +{ + return ffxSaturate(m * ffxBroadcast4(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPU +FfxFloat32 ffxIsGreaterThanZero(FfxFloat32 m) +{ + return ffxSaturate(m * FfxFloat32(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPU +FfxFloat32x2 ffxIsGreaterThanZero(FfxFloat32x2 m) +{ + return ffxSaturate(m * ffxBroadcast2(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPU +FfxFloat32x3 ffxIsGreaterThanZero(FfxFloat32x3 m) +{ + return ffxSaturate(m * ffxBroadcast3(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPU +FfxFloat32x4 ffxIsGreaterThanZero(FfxFloat32x4 m) +{ + return ffxSaturate(m * ffxBroadcast4(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// Convert a 32bit floating point value to sortable integer. +/// +/// - If sign bit=0, flip the sign bit (positives). +/// - If sign bit=1, flip all bits (negatives). +/// +/// The function has the side effects that: +/// - Larger integers are more positive values. +/// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage). +/// +/// @param [in] value The floating point value to make sortable. +/// +/// @returns +/// The sortable integer value. +/// +/// @ingroup GPU +FfxUInt32 ffxFloatToSortableInteger(FfxUInt32 value) +{ + return value ^ ((AShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000)); +} + +/// Convert a sortable integer to a 32bit floating point value. +/// +/// The function has the side effects that: +/// - If sign bit=1, flip the sign bit (positives). +/// - If sign bit=0, flip all bits (negatives). +/// +/// @param [in] value The floating point value to make sortable. +/// +/// @returns +/// The sortable integer value. +/// +/// @ingroup GPU +FfxUInt32 ffxSortableIntegerToFloat(FfxUInt32 value) +{ + return value ^ ((~AShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000)); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat32 ffxApproximateSqrt(FfxFloat32 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(1)) + FfxUInt32(0x1fbc4639)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat32 ffxApproximateReciprocal(FfxFloat32 a) +{ + return ffxAsFloat(FfxUInt32(0x7ef07ebb) - ffxAsUInt32(a)); +} + +/// Calculate a medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPU +FfxFloat32 ffxApproximateReciprocalMedium(FfxFloat32 value) +{ + FfxFloat32 b = ffxAsFloat(FfxUInt32(0x7ef19fff) - ffxAsUInt32(value)); + return b * (-b * value + FfxFloat32(2.0)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal square root for. +/// +/// @returns +/// An approximation of the reciprocal square root, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat32 ffxApproximateReciprocalSquareRoot(FfxFloat32 a) +{ + return ffxAsFloat(FfxUInt32(0x5f347d74) - (ffxAsUInt32(a) >> FfxUInt32(1))); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat32x2 ffxApproximateSqrt(FfxFloat32x2 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(1u)) + ffxBroadcast2(0x1fbc4639u)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat32x2 ffxApproximateReciprocal(FfxFloat32x2 a) +{ + return ffxAsFloat(ffxBroadcast2(0x7ef07ebbu) - ffxAsUInt32(a)); +} + +/// Calculate a medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPU +FfxFloat32x2 ffxApproximateReciprocalMedium(FfxFloat32x2 a) +{ + FfxFloat32x2 b = ffxAsFloat(ffxBroadcast2(0x7ef19fffu) - ffxAsUInt32(a)); + return b * (-b * a + ffxBroadcast2(2.0f)); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat32x2 ffxApproximateReciprocalSquareRoot(FfxFloat32x2 a) +{ + return ffxAsFloat(ffxBroadcast2(0x5f347d74u) - (ffxAsUInt32(a) >> ffxBroadcast2(1u))); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat32x3 ffxApproximateSqrt(FfxFloat32x3 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(1u)) + ffxBroadcast3(0x1fbc4639u)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat32x3 ffxApproximateReciprocal(FfxFloat32x3 a) +{ + return ffxAsFloat(ffxBroadcast3(0x7ef07ebbu) - ffxAsUInt32(a)); +} + +/// Calculate a medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPU +FfxFloat32x3 ffxApproximateReciprocalMedium(FfxFloat32x3 a) +{ + FfxFloat32x3 b = ffxAsFloat(ffxBroadcast3(0x7ef19fffu) - ffxAsUInt32(a)); + return b * (-b * a + ffxBroadcast3(2.0f)); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat32x3 ffxApproximateReciprocalSquareRoot(FfxFloat32x3 a) +{ + return ffxAsFloat(ffxBroadcast3(0x5f347d74u) - (ffxAsUInt32(a) >> ffxBroadcast3(1u))); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat32x4 ffxApproximateSqrt(FfxFloat32x4 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(1u)) + ffxBroadcast4(0x1fbc4639u)); +} + +/// Calculate a low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat32x4 ffxApproximateReciprocal(FfxFloat32x4 a) +{ + return ffxAsFloat(ffxBroadcast4(0x7ef07ebbu) - ffxAsUInt32(a)); +} + +/// Calculate a medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPU +FfxFloat32x4 ffxApproximateReciprocalMedium(FfxFloat32x4 a) +{ + FfxFloat32x4 b = ffxAsFloat(ffxBroadcast4(0x7ef19fffu) - ffxAsUInt32(a)); + return b * (-b * a + ffxBroadcast4(2.0f)); +} + +/// Calculate a low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] value The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat32x4 ffxApproximateReciprocalSquareRoot(FfxFloat32x4 a) +{ + return ffxAsFloat(ffxBroadcast4(0x5f347d74u) - (ffxAsUInt32(a) >> ffxBroadcast4(1u))); +} + +/// Calculate dot product of 'a' and 'b'. +/// +/// @param [in] a First vector input. +/// @param [in] b Second vector input. +/// +/// @returns +/// The value of a dot b. +/// +/// @ingroup GPU +FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b) +{ + return dot(a, b); +} + +/// Calculate dot product of 'a' and 'b'. +/// +/// @param [in] a First vector input. +/// @param [in] b Second vector input. +/// +/// @returns +/// The value of a dot b. +/// +/// @ingroup GPU +FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b) +{ + return dot(a, b); +} + +/// Calculate dot product of 'a' and 'b'. +/// +/// @param [in] a First vector input. +/// @param [in] b Second vector input. +/// +/// @returns +/// The value of a dot b. +/// +/// @ingroup GPU +FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b) +{ + return dot(a, b); +} + + +/// Compute an approximate conversion from PQ to Gamma2 space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and Gamma2. +/// +/// @returns +/// The value a converted into Gamma2. +/// +/// @ingroup GPU +FfxFloat32 ffxApproximatePQToGamma2Medium(FfxFloat32 a) +{ + return a * a * a * a; +} + +/// Compute an approximate conversion from PQ to linear space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and linear. +/// +/// @returns +/// The value a converted into linear. +/// +/// @ingroup GPU +FfxFloat32 ffxApproximatePQToLinear(FfxFloat32 a) +{ + return a * a * a * a * a * a * a * a; +} + +/// Compute an approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32 ffxApproximateGamma2ToPQ(FfxFloat32 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46)); +} + +/// Compute a more accurate approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32 ffxApproximateGamma2ToPQMedium(FfxFloat32 a) +{ + FfxFloat32 b = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46)); + FfxFloat32 b4 = b * b * b * b; + return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); +} + +/// Compute a high accuracy approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32 ffxApproximateGamma2ToPQHigh(FfxFloat32 a) +{ + return ffxSqrt(ffxSqrt(a)); +} + +/// Compute an approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32 ffxApproximateLinearToPQ(FfxFloat32 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723)); +} + +/// Compute a more accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32 ffxApproximateLinearToPQMedium(FfxFloat32 a) +{ + FfxFloat32 b = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723)); + FfxFloat32 b8 = b * b * b * b * b * b * b * b; + return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); +} + +/// Compute a very accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32 ffxApproximateLinearToPQHigh(FfxFloat32 a) +{ + return ffxSqrt(ffxSqrt(ffxSqrt(a))); +} + +/// Compute an approximate conversion from PQ to Gamma2 space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and Gamma2. +/// +/// @returns +/// The value a converted into Gamma2. +/// +/// @ingroup GPU +FfxFloat32x2 ffxApproximatePQToGamma2Medium(FfxFloat32x2 a) +{ + return a * a * a * a; +} + +/// Compute an approximate conversion from PQ to linear space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and linear. +/// +/// @returns +/// The value a converted into linear. +/// +/// @ingroup GPU +FfxFloat32x2 ffxApproximatePQToLinear(FfxFloat32x2 a) +{ + return a * a * a * a * a * a * a * a; +} + +/// Compute an approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32x2 ffxApproximateGamma2ToPQ(FfxFloat32x2 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u)); +} + +/// Compute a more accurate approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32x2 ffxApproximateGamma2ToPQMedium(FfxFloat32x2 a) +{ + FfxFloat32x2 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u)); + FfxFloat32x2 b4 = b * b * b * b; + return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); +} + +/// Compute a high accuracy approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32x2 ffxApproximateGamma2ToPQHigh(FfxFloat32x2 a) +{ + return ffxSqrt(ffxSqrt(a)); +} + +/// Compute an approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32x2 ffxApproximateLinearToPQ(FfxFloat32x2 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u)); +} + +/// Compute a more accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32x2 ffxApproximateLinearToPQMedium(FfxFloat32x2 a) +{ + FfxFloat32x2 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u)); + FfxFloat32x2 b8 = b * b * b * b * b * b * b * b; + return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); +} + +/// Compute a very accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32x2 ffxApproximateLinearToPQHigh(FfxFloat32x2 a) +{ + return ffxSqrt(ffxSqrt(ffxSqrt(a))); +} + +/// Compute an approximate conversion from PQ to Gamma2 space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and Gamma2. +/// +/// @returns +/// The value a converted into Gamma2. +/// +/// @ingroup GPU +FfxFloat32x3 ffxApproximatePQToGamma2Medium(FfxFloat32x3 a) +{ + return a * a * a * a; +} + +/// Compute an approximate conversion from PQ to linear space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and linear. +/// +/// @returns +/// The value a converted into linear. +/// +/// @ingroup GPU +FfxFloat32x3 ffxApproximatePQToLinear(FfxFloat32x3 a) +{ + return a * a * a * a * a * a * a * a; +} + +/// Compute an approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32x3 ffxApproximateGamma2ToPQ(FfxFloat32x3 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u)); +} + +/// Compute a more accurate approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32x3 ffxApproximateGamma2ToPQMedium(FfxFloat32x3 a) +{ + FfxFloat32x3 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u)); + FfxFloat32x3 b4 = b * b * b * b; + return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); +} + +/// Compute a high accuracy approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32x3 ffxApproximateGamma2ToPQHigh(FfxFloat32x3 a) +{ + return ffxSqrt(ffxSqrt(a)); +} + +/// Compute an approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32x3 ffxApproximateLinearToPQ(FfxFloat32x3 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u)); +} + +/// Compute a more accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32x3 ffxApproximateLinearToPQMedium(FfxFloat32x3 a) +{ + FfxFloat32x3 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u)); + FfxFloat32x3 b8 = b * b * b * b * b * b * b * b; + return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); +} + +/// Compute a very accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32x3 ffxApproximateLinearToPQHigh(FfxFloat32x3 a) +{ + return ffxSqrt(ffxSqrt(ffxSqrt(a))); +} + +/// Compute an approximate conversion from PQ to Gamma2 space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and Gamma2. +/// +/// @returns +/// The value a converted into Gamma2. +/// +/// @ingroup GPU +FfxFloat32x4 ffxApproximatePQToGamma2Medium(FfxFloat32x4 a) +{ + return a * a * a * a; +} + +/// Compute an approximate conversion from PQ to linear space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between PQ and linear. +/// +/// @returns +/// The value a converted into linear. +/// +/// @ingroup GPU +FfxFloat32x4 ffxApproximatePQToLinear(FfxFloat32x4 a) +{ + return a * a * a * a * a * a * a * a; +} + +/// Compute an approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32x4 ffxApproximateGamma2ToPQ(FfxFloat32x4 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u)); +} + +/// Compute a more accurate approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32x4 ffxApproximateGamma2ToPQMedium(FfxFloat32x4 a) +{ + FfxFloat32x4 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u)); + FfxFloat32x4 b4 = b * b * b * b * b * b * b * b; + return b - b * (b4 - a) / (FfxFloat32(4.0) * b4); +} + +/// Compute a high accuracy approximate conversion from gamma2 to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between gamma2 and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32x4 ffxApproximateGamma2ToPQHigh(FfxFloat32x4 a) +{ + return ffxSqrt(ffxSqrt(a)); +} + +/// Compute an approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32x4 ffxApproximateLinearToPQ(FfxFloat32x4 a) +{ + return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u)); +} + +/// Compute a more accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32x4 ffxApproximateLinearToPQMedium(FfxFloat32x4 a) +{ + FfxFloat32x4 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u)); + FfxFloat32x4 b8 = b * b * b * b * b * b * b * b; + return b - b * (b8 - a) / (FfxFloat32(8.0) * b8); +} + +/// Compute a very accurate approximate conversion from linear to PQ space. +/// +/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do +/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear +/// (8th power and fast 8th root). The maximum error is approximately 0.2%. +/// +/// @param a The value to convert between linear and PQ. +/// +/// @returns +/// The value a converted into PQ. +/// +/// @ingroup GPU +FfxFloat32x4 ffxApproximateLinearToPQHigh(FfxFloat32x4 a) +{ + return ffxSqrt(ffxSqrt(ffxSqrt(a))); +} + +// An approximation of sine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate sine for. +// +// @returns +// The approximate sine of value. +FfxFloat32 ffxParabolicSin(FfxFloat32 value) +{ + return value * abs(value) - value; +} + +// An approximation of sine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate sine for. +// +// @returns +// The approximate sine of value. +FfxFloat32x2 ffxParabolicSin(FfxFloat32x2 x) +{ + return x * abs(x) - x; +} + +// An approximation of cosine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate cosine for. +// +// @returns +// The approximate cosine of value. +FfxFloat32 ffxParabolicCos(FfxFloat32 x) +{ + x = ffxFract(x * FfxFloat32(0.5) + FfxFloat32(0.75)); + x = x * FfxFloat32(2.0) - FfxFloat32(1.0); + return ffxParabolicSin(x); +} + +// An approximation of cosine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate cosine for. +// +// @returns +// The approximate cosine of value. +FfxFloat32x2 ffxParabolicCos(FfxFloat32x2 x) +{ + x = ffxFract(x * ffxBroadcast2(0.5f) + ffxBroadcast2(0.75f)); + x = x * ffxBroadcast2(2.0f) - ffxBroadcast2(1.0f); + return ffxParabolicSin(x); +} + +// An approximation of both sine and cosine. +// +// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +// is {-1/4 to 1/4} representing {-1 to 1}. +// +// @param [in] value The value to calculate approximate cosine for. +// +// @returns +// A FfxFloat32x2 containing approximations of both sine and cosine of value. +FfxFloat32x2 ffxParabolicSinCos(FfxFloat32 x) +{ + FfxFloat32 y = ffxFract(x * FfxFloat32(0.5) + FfxFloat32(0.75)); + y = y * FfxFloat32(2.0) - FfxFloat32(1.0); + return ffxParabolicSin(FfxFloat32x2(x, y)); +} + +/// Conditional free logic AND operation using values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPU +FfxUInt32 ffxZeroOneAnd(FfxUInt32 x, FfxUInt32 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPU +FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x, FfxUInt32x2 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPU +FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x, FfxUInt32x3 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPU +FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x, FfxUInt32x4 y) +{ + return min(x, y); +} + +/// Conditional free logic NOT operation using two values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPU +FfxUInt32 ffxZeroOneAnd(FfxUInt32 x) +{ + return x ^ FfxUInt32(1); +} + +/// Conditional free logic NOT operation using two values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPU +FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x) +{ + return x ^ ffxBroadcast2(1u); +} + +/// Conditional free logic NOT operation using two values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPU +FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x) +{ + return x ^ ffxBroadcast3(1u); +} + +/// Conditional free logic NOT operation using two values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPU +FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x) +{ + return x ^ ffxBroadcast4(1u); +} + +/// Conditional free logic OR operation using two values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPU +FfxUInt32 ffxZeroOneOr(FfxUInt32 x, FfxUInt32 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPU +FfxUInt32x2 ffxZeroOneOr(FfxUInt32x2 x, FfxUInt32x2 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPU +FfxUInt32x3 ffxZeroOneOr(FfxUInt32x3 x, FfxUInt32x3 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPU +FfxUInt32x4 ffxZeroOneOr(FfxUInt32x4 x, FfxUInt32x4 y) +{ + return max(x, y); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPU +FfxUInt32 ffxZeroOneAndToU1(FfxFloat32 x) +{ + return FfxUInt32(FfxFloat32(1.0) - x); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPU +FfxUInt32x2 ffxZeroOneAndToU2(FfxFloat32x2 x) +{ + return FfxUInt32x2(ffxBroadcast2(1.0) - x); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPU +FfxUInt32x3 ffxZeroOneAndToU3(FfxFloat32x3 x) +{ + return FfxUInt32x3(ffxBroadcast3(1.0) - x); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPU +FfxUInt32x4 ffxZeroOneAndToU4(FfxFloat32x4 x) +{ + return FfxUInt32x4(ffxBroadcast4(1.0) - x); +} + +/// Conditional free logic AND operation using two values followed by a NOT operation +/// using the resulting value and a third value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPU +FfxFloat32 ffxZeroOneAndOr(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two values followed by a NOT operation +/// using the resulting value and a third value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPU +FfxFloat32x2 ffxZeroOneAndOr(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two values followed by a NOT operation +/// using the resulting value and a third value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPU +FfxFloat32x3 ffxZeroOneAndOr(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two values followed by a NOT operation +/// using the resulting value and a third value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPU +FfxFloat32x4 ffxZeroOneAndOr(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return ffxSaturate(x * y + z); +} + +/// Given a value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPU +FfxFloat32 ffxZeroOneIsGreaterThanZero(FfxFloat32 x) +{ + return ffxSaturate(x * FfxFloat32(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPU +FfxFloat32x2 ffxZeroOneIsGreaterThanZero(FfxFloat32x2 x) +{ + return ffxSaturate(x * ffxBroadcast2(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPU +FfxFloat32x3 ffxZeroOneIsGreaterThanZero(FfxFloat32x3 x) +{ + return ffxSaturate(x * ffxBroadcast3(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPU +FfxFloat32x4 ffxZeroOneIsGreaterThanZero(FfxFloat32x4 x) +{ + return ffxSaturate(x * ffxBroadcast4(FFX_POSITIVE_INFINITY_FLOAT)); +} + +/// Conditional free logic signed NOT operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPU +FfxFloat32 ffxZeroOneAnd(FfxFloat32 x) +{ + return FfxFloat32(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPU +FfxFloat32x2 ffxZeroOneAnd(FfxFloat32x2 x) +{ + return ffxBroadcast2(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPU +FfxFloat32x3 ffxZeroOneAnd(FfxFloat32x3 x) +{ + return ffxBroadcast3(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPU +FfxFloat32x4 ffxZeroOneAnd(FfxFloat32x4 x) +{ + return ffxBroadcast4(1.0) - x; +} + +/// Conditional free logic OR operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPU +FfxFloat32 ffxZeroOneOr(FfxFloat32 x, FfxFloat32 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPU +FfxFloat32x2 ffxZeroOneOr(FfxFloat32x2 x, FfxFloat32x2 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPU +FfxFloat32x3 ffxZeroOneOr(FfxFloat32x3 x, FfxFloat32x3 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPU +FfxFloat32x4 ffxZeroOneOr(FfxFloat32x4 x, FfxFloat32x4 y) +{ + return max(x, y); +} + +/// Choose between two FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPU +FfxFloat32 ffxZeroOneSelect(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + FfxFloat32 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPU +FfxFloat32x2 ffxZeroOneSelect(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + FfxFloat32x2 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPU +FfxFloat32x3 ffxZeroOneSelect(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + FfxFloat32x3 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPU +FfxFloat32x4 ffxZeroOneSelect(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + FfxFloat32x4 r = (-x) * z + z; + return x * y + r; +} + +/// Given a value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPU +FfxFloat32 ffxZeroOneIsSigned(FfxFloat32 x) +{ + return ffxSaturate(x * FfxFloat32(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPU +FfxFloat32x2 ffxZeroOneIsSigned(FfxFloat32x2 x) +{ + return ffxSaturate(x * ffxBroadcast2(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPU +FfxFloat32x3 ffxZeroOneIsSigned(FfxFloat32x3 x) +{ + return ffxSaturate(x * ffxBroadcast3(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// Given a value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPU +FfxFloat32x4 ffxZeroOneIsSigned(FfxFloat32x4 x) +{ + return ffxSaturate(x * ffxBroadcast4(FFX_NEGATIVE_INFINITY_FLOAT)); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] color The color to convert to Rec. 709. +/// +/// @returns +/// The color in linear space. +/// +/// @ingroup GPU +FfxFloat32 ffxRec709FromLinear(FfxFloat32 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099); + return clamp(j.x, color * j.y, pow(color, j.z) * k.x + k.y); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] color The color to convert to Rec. 709. +/// +/// @returns +/// The color in linear space. +/// +/// @ingroup GPU +FfxFloat32x2 ffxRec709FromLinear(FfxFloat32x2 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099); + return clamp(j.xx, color * j.yy, pow(color, j.zz) * k.xx + k.yy); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] color The color to convert to Rec. 709. +/// +/// @returns +/// The color in linear space. +/// +/// @ingroup GPU +FfxFloat32x3 ffxRec709FromLinear(FfxFloat32x3 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099); + return clamp(j.xxx, color * j.yyy, pow(color, j.zzz) * k.xxx + k.yyy); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGamma. +/// +/// @param [in] value The value to convert to gamma space from linear. +/// @param [in] power The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPU +FfxFloat32 ffxGammaFromLinear(FfxFloat32 color, FfxFloat32 rcpX) +{ + return pow(color, FfxFloat32(rcpX)); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGamma. +/// +/// @param [in] value The value to convert to gamma space from linear. +/// @param [in] power The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPU +FfxFloat32x2 ffxGammaFromLinear(FfxFloat32x2 color, FfxFloat32 rcpX) +{ + return pow(color, ffxBroadcast2(rcpX)); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGamma. +/// +/// @param [in] value The value to convert to gamma space from linear. +/// @param [in] power The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPU +FfxFloat32x3 ffxGammaFromLinear(FfxFloat32x3 color, FfxFloat32 rcpX) +{ + return pow(color, ffxBroadcast3(rcpX)); +} + +/// Compute a PQ value from a linear value. +/// +/// @param [in] value The value to convert to PQ from linear. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat32 ffxPQToLinear(FfxFloat32 x) +{ + FfxFloat32 p = pow(x, FfxFloat32(0.159302)); + return pow((FfxFloat32(0.835938) + FfxFloat32(18.8516) * p) / (FfxFloat32(1.0) + FfxFloat32(18.6875) * p), FfxFloat32(78.8438)); +} + +/// Compute a PQ value from a linear value. +/// +/// @param [in] value The value to convert to PQ from linear. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat32x2 ffxPQToLinear(FfxFloat32x2 x) +{ + FfxFloat32x2 p = pow(x, ffxBroadcast2(0.159302)); + return pow((ffxBroadcast2(0.835938) + ffxBroadcast2(18.8516) * p) / (ffxBroadcast2(1.0) + ffxBroadcast2(18.6875) * p), ffxBroadcast2(78.8438)); +} + +/// Compute a PQ value from a linear value. +/// +/// @param [in] value The value to convert to PQ from linear. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat32x3 ffxPQToLinear(FfxFloat32x3 x) +{ + FfxFloat32x3 p = pow(x, ffxBroadcast3(0.159302)); + return pow((ffxBroadcast3(0.835938) + ffxBroadcast3(18.8516) * p) / (ffxBroadcast3(1.0) + ffxBroadcast3(18.6875) * p), ffxBroadcast3(78.8438)); +} + +/// Compute a linear value from a SRGB value. +/// +/// @param [in] value The value to convert to linear from SRGB. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPU +FfxFloat32 ffxSrgbToLinear(FfxFloat32 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); + return clamp(j.x, color * j.y, pow(color, j.z) * k.x + k.y); +} + +/// Compute a linear value from a SRGB value. +/// +/// @param [in] value The value to convert to linear from SRGB. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPU +FfxFloat32x2 ffxSrgbToLinear(FfxFloat32x2 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); + return clamp(j.xx, color * j.yy, pow(color, j.zz) * k.xx + k.yy); +} + +/// Compute a linear value from a SRGB value. +/// +/// @param [in] value The value to convert to linear from SRGB. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPU +FfxFloat32x3 ffxSrgbToLinear(FfxFloat32x3 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); + return clamp(j.xxx, color * j.yyy, pow(color, j.zzz) * k.xxx + k.yyy); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] color The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat32 ffxLinearFromRec709(FfxFloat32 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.x), color * j.y, pow(color * k.x + k.y, j.z)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] color The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat32x2 ffxLinearFromRec709(FfxFloat32x2 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xx), color * j.yy, pow(color * k.xx + k.yy, j.zz)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] color The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat32x3 ffxLinearFromRec709(FfxFloat32x3 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xxx), color * j.yyy, pow(color * k.xxx + k.yyy, j.zzz)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] color The value to convert to linear in gamma space. +/// @param [in] power The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat32 ffxLinearFromGamma(FfxFloat32 color, FfxFloat32 power) +{ + return pow(color, FfxFloat32(power)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] color The value to convert to linear in gamma space. +/// @param [in] power The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat32x2 ffxLinearFromGamma(FfxFloat32x2 color, FfxFloat32 power) +{ + return pow(color, ffxBroadcast2(power)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] color The value to convert to linear in gamma space. +/// @param [in] power The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat32x3 ffxLinearFromGamma(FfxFloat32x3 color, FfxFloat32 power) +{ + return pow(color, ffxBroadcast3(power)); +} + +/// Compute a linear value from a value in a PQ space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in PQ space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat32 ffxLinearFromPQ(FfxFloat32 x) +{ + FfxFloat32 p = pow(x, FfxFloat32(0.0126833)); + return pow(ffxSaturate(p - FfxFloat32(0.835938)) / (FfxFloat32(18.8516) - FfxFloat32(18.6875) * p), FfxFloat32(6.27739)); +} + +/// Compute a linear value from a value in a PQ space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in PQ space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat32x2 ffxLinearFromPQ(FfxFloat32x2 x) +{ + FfxFloat32x2 p = pow(x, ffxBroadcast2(0.0126833)); + return pow(ffxSaturate(p - ffxBroadcast2(0.835938)) / (ffxBroadcast2(18.8516) - ffxBroadcast2(18.6875) * p), ffxBroadcast2(6.27739)); +} + +/// Compute a linear value from a value in a PQ space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in PQ space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat32x3 ffxLinearFromPQ(FfxFloat32x3 x) +{ + FfxFloat32x3 p = pow(x, ffxBroadcast3(0.0126833)); + return pow(ffxSaturate(p - ffxBroadcast3(0.835938)) / (ffxBroadcast3(18.8516) - ffxBroadcast3(18.6875) * p), ffxBroadcast3(6.27739)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat32 ffxLinearFromSrgb(FfxFloat32 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.x), color * j.y, pow(color * k.x + k.y, j.z)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xx), color * j.yy, pow(color * k.xx + k.yy, j.zz)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] value The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat32x3 ffxLinearFromSrgb(FfxFloat32x3 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xxx), color * j.yyy, pow(color * k.xxx + k.yyy, j.zzz)); +} + +/// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear. +/// +/// 543210 +/// ====== +/// ..xxx. +/// yy...y +/// +/// @param [in] a The input 1D coordinates to remap. +/// +/// @returns +/// The remapped 2D coordinates. +/// +/// @ingroup GPU +FfxUInt32x2 ffxRemapForQuad(FfxUInt32 a) +{ + return FfxUInt32x2(bitfieldExtract(a, 1u, 3u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), a, 1u)); +} + +/// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions. +/// +/// The 64-wide lane indices to 8x8 remapping is performed as follows: +/// +/// 00 01 08 09 10 11 18 19 +/// 02 03 0a 0b 12 13 1a 1b +/// 04 05 0c 0d 14 15 1c 1d +/// 06 07 0e 0f 16 17 1e 1f +/// 20 21 28 29 30 31 38 39 +/// 22 23 2a 2b 32 33 3a 3b +/// 24 25 2c 2d 34 35 3c 3d +/// 26 27 2e 2f 36 37 3e 3f +/// +/// @param [in] a The input 1D coordinate to remap. +/// +/// @returns +/// The remapped 2D coordinates. +/// +/// @ingroup GPU +FfxUInt32x2 ffxRemapForWaveReduction(FfxUInt32 a) +{ + return FfxUInt32x2(bitfieldInsertMask(bitfieldExtract(a, 2u, 3u), a, 1u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), bitfieldExtract(a, 1u, 2u), 2u)); +} diff --git a/Assets/Shaders/FSR2/ffx_core_gpu_common.h.meta b/Assets/Shaders/FSR2/ffx_core_gpu_common.h.meta new file mode 100644 index 0000000..2bc6ab3 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_core_gpu_common.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 8ca16c7c1627e754cb45dd6c91cd7b5b +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_core_gpu_common_half.h b/Assets/Shaders/FSR2/ffx_core_gpu_common_half.h new file mode 100644 index 0000000..63105be --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_core_gpu_common_half.h @@ -0,0 +1,2978 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#if FFX_HALF +#if FFX_HLSL_6_2 +/// A define value for 16bit positive infinity. +/// +/// @ingroup GPU +#define FFX_POSITIVE_INFINITY_HALF FFX_TO_FLOAT16((uint16_t)0x7c00u) + +/// A define value for 16bit negative infinity. +/// +/// @ingroup GPU +#define FFX_NEGATIVE_INFINITY_HALF FFX_TO_FLOAT16((uint16_t)0xfc00u) +#else +/// A define value for 16bit positive infinity. +/// +/// @ingroup GPU +#define FFX_POSITIVE_INFINITY_HALF FFX_TO_FLOAT16(0x7c00u) + +/// A define value for 16bit negative infinity. +/// +/// @ingroup GPU +#define FFX_NEGATIVE_INFINITY_HALF FFX_TO_FLOAT16(0xfc00u) +#endif // FFX_HLSL_6_2 + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxFloat16 ffxMin(FfxFloat16 x, FfxFloat16 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxFloat16x2 ffxMin(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxFloat16x3 ffxMin(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxFloat16x4 ffxMin(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxInt16 ffxMin(FfxInt16 x, FfxInt16 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxInt16x2 ffxMin(FfxInt16x2 x, FfxInt16x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxInt16x3 ffxMin(FfxInt16x3 x, FfxInt16x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxInt16x4 ffxMin(FfxInt16x4 x, FfxInt16x4 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxUInt16 ffxMin(FfxUInt16 x, FfxUInt16 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxUInt16x2 ffxMin(FfxUInt16x2 x, FfxUInt16x2 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxUInt16x3 ffxMin(FfxUInt16x3 x, FfxUInt16x3 y) +{ + return min(x, y); +} + +/// Compute the min of two values. +/// +/// @param [in] x The first value to compute the min of. +/// @param [in] y The second value to compute the min of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxUInt16x4 ffxMin(FfxUInt16x4 x, FfxUInt16x4 y) +{ + return min(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxFloat16 ffxMax(FfxFloat16 x, FfxFloat16 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxFloat16x2 ffxMax(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxFloat16x3 ffxMax(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxFloat16x4 ffxMax(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxInt16 ffxMax(FfxInt16 x, FfxInt16 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxInt16x2 ffxMax(FfxInt16x2 x, FfxInt16x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxInt16x3 ffxMax(FfxInt16x3 x, FfxInt16x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxInt16x4 ffxMax(FfxInt16x4 x, FfxInt16x4 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxUInt16 ffxMax(FfxUInt16 x, FfxUInt16 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxUInt16x2 ffxMax(FfxUInt16x2 x, FfxUInt16x2 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxUInt16x3 ffxMax(FfxUInt16x3 x, FfxUInt16x3 y) +{ + return max(x, y); +} + +/// Compute the max of two values. +/// +/// @param [in] x The first value to compute the max of. +/// @param [in] y The second value to compute the max of. +/// +/// @returns +/// The the lowest of two values. +/// +/// @ingroup GPU +FfxUInt16x4 ffxMax(FfxUInt16x4 x, FfxUInt16x4 y) +{ + return max(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPU +FfxFloat16 ffxPow(FfxFloat16 x, FfxFloat16 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPU +FfxFloat16x2 ffxPow(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPU +FfxFloat16x3 ffxPow(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return pow(x, y); +} + +/// Compute the value of the first parameter raised to the power of the second. +/// +/// @param [in] x The value to raise to the power y. +/// @param [in] y The power to which to raise x. +/// +/// @returns +/// The value of the first parameter raised to the power of the second. +/// +/// @ingroup GPU +FfxFloat16x4 ffxPow(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return pow(x, y); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPU +FfxFloat16 ffxSqrt(FfxFloat16 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPU +FfxFloat16x2 ffxSqrt(FfxFloat16x2 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPU +FfxFloat16x3 ffxSqrt(FfxFloat16x3 x) +{ + return sqrt(x); +} + +/// Compute the square root of a value. +/// +/// @param [in] x The first value to compute the min of. +/// +/// @returns +/// The the square root of x. +/// +/// @ingroup GPU +FfxFloat16x4 ffxSqrt(FfxFloat16x4 x) +{ + return sqrt(x); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPU +FfxFloat16 ffxCopySignBitHalf(FfxFloat16 d, FfxFloat16 s) +{ + return FFX_TO_FLOAT16(FFX_TO_UINT16(d) | (FFX_TO_UINT16(s) & FFX_BROADCAST_UINT16(0x8000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPU +FfxFloat16x2 ffxCopySignBitHalf(FfxFloat16x2 d, FfxFloat16x2 s) +{ + return FFX_TO_FLOAT16X2(FFX_TO_UINT16X2(d) | (FFX_TO_UINT16X2(s) & FFX_BROADCAST_UINT16X2(0x8000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPU +FfxFloat16x3 ffxCopySignBitHalf(FfxFloat16x3 d, FfxFloat16x3 s) +{ + return FFX_TO_FLOAT16X3(FFX_TO_UINT16X3(d) | (FFX_TO_UINT16X3(s) & FFX_BROADCAST_UINT16X3(0x8000u))); +} + +/// Copy the sign bit from 's' to positive 'd'. +/// +/// @param [in] d The value to copy the sign bit into. +/// @param [in] s The value to copy the sign bit from. +/// +/// @returns +/// The value of d with the sign bit from s. +/// +/// @ingroup GPU +FfxFloat16x4 ffxCopySignBitHalf(FfxFloat16x4 d, FfxFloat16x4 s) +{ + return FFX_TO_FLOAT16X4(FFX_TO_UINT16X4(d) | (FFX_TO_UINT16X4(s) & FFX_BROADCAST_UINT16X4(0x8000u))); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPU +FfxFloat16 ffxIsSignedHalf(FfxFloat16 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPU +FfxFloat16x2 ffxIsSignedHalf(FfxFloat16x2 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPU +FfxFloat16x3 ffxIsSignedHalf(FfxFloat16x3 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 0 +/// m >= 0 := 0 +/// m < 0 := 1 +/// +/// Uses the following useful floating point logic, +/// saturate(+a*(-INF)==-INF) := 0 +/// saturate( 0*(-INF)== NaN) := 0 +/// saturate(-a*(-INF)==+INF) := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against 0. +/// +/// @returns +/// 1.0 when the value is negative, or 0.0 when the value is 0 or position. +/// +/// @ingroup GPU +FfxFloat16x4 ffxIsSignedHalf(FfxFloat16x4 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPU +FfxFloat16 ffxIsGreaterThanZeroHalf(FfxFloat16 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPU +FfxFloat16x2 ffxIsGreaterThanZeroHalf(FfxFloat16x2 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPU +FfxFloat16x3 ffxIsGreaterThanZeroHalf(FfxFloat16x3 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF)); +} + +/// A single operation to return the following: +/// m = NaN := 1 +/// m > 0 := 0 +/// m <= 0 := 1 +/// +/// This function is useful when creating masks for branch-free logic. +/// +/// @param [in] m The value to test against zero. +/// +/// @returns +/// 1.0 when the value is position, or 0.0 when the value is 0 or negative. +/// +/// @ingroup GPU +FfxFloat16x4 ffxIsGreaterThanZeroHalf(FfxFloat16x4 m) +{ + return ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF)); +} + +/// Convert a 16bit floating point value to sortable integer. +/// +/// - If sign bit=0, flip the sign bit (positives). +/// - If sign bit=1, flip all bits (negatives). +/// +/// The function has the side effects that: +/// - Larger integers are more positive values. +/// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage). +/// +/// @param [in] x The floating point value to make sortable. +/// +/// @returns +/// The sortable integer value. +/// +/// @ingroup GPU +FfxUInt16 ffxFloatToSortableIntegerHalf(FfxUInt16 x) +{ + return x ^ ((ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16(15))) | FFX_BROADCAST_UINT16(0x8000)); +} + +/// Convert a sortable integer to a 16bit floating point value. +/// +/// The function has the side effects that: +/// - If sign bit=1, flip the sign bit (positives). +/// - If sign bit=0, flip all bits (negatives). +/// +/// @param [in] x The sortable integer value to make floating point. +/// +/// @returns +/// The floating point value. +/// +/// @ingroup GPU +FfxUInt16 ffxSortableIntegerToFloatHalf(FfxUInt16 x) +{ + return x ^ ((~ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16(15))) | FFX_BROADCAST_UINT16(0x8000)); +} + +/// Convert a pair of 16bit floating point values to a pair of sortable integers. +/// +/// - If sign bit=0, flip the sign bit (positives). +/// - If sign bit=1, flip all bits (negatives). +/// +/// The function has the side effects that: +/// - Larger integers are more positive values. +/// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage). +/// +/// @param [in] x The floating point values to make sortable. +/// +/// @returns +/// The sortable integer values. +/// +/// @ingroup GPU +FfxUInt16x2 ffxFloatToSortableIntegerHalf(FfxUInt16x2 x) +{ + return x ^ ((ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16X2(15))) | FFX_BROADCAST_UINT16X2(0x8000)); +} + +/// Convert a pair of sortable integers to a pair of 16bit floating point values. +/// +/// The function has the side effects that: +/// - If sign bit=1, flip the sign bit (positives). +/// - If sign bit=0, flip all bits (negatives). +/// +/// @param [in] x The sortable integer values to make floating point. +/// +/// @returns +/// The floating point values. +/// +/// @ingroup GPU +FfxUInt16x2 ffxSortableIntegerToFloatHalf(FfxUInt16x2 x) +{ + return x ^ ((~ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16X2(15))) | FFX_BROADCAST_UINT16X2(0x8000)); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// [Zero] Y0 [Zero] X0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPU +FfxUInt32 ffxPackBytesZeroY0ZeroX0(FfxUInt32x2 i) +{ + return ((i.x) & 0xffu) | ((i.y << 16) & 0xff0000u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// [Zero] Y1 [Zero] X1 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPU +FfxUInt32 ffxPackBytesZeroY1ZeroX1(FfxUInt32x2 i) +{ + return ((i.x >> 8) & 0xffu) | ((i.y << 8) & 0xff0000u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// [Zero] Y2 [Zero] X2 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPU +FfxUInt32 ffxPackBytesZeroY2ZeroX2(FfxUInt32x2 i) +{ + return ((i.x >> 16) & 0xffu) | ((i.y) & 0xff0000u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// [Zero] Y3 [Zero] X3 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPU +FfxUInt32 ffxPackBytesZeroY3ZeroX3(FfxUInt32x2 i) +{ + return ((i.x >> 24) & 0xffu) | ((i.y >> 8) & 0xff0000u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 Y2 Y1 X0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPU +FfxUInt32 ffxPackBytesY3Y2Y1X0(FfxUInt32x2 i) +{ + return ((i.x) & 0x000000ffu) | (i.y & 0xffffff00u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 Y2 Y1 X2 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPU +FfxUInt32 ffxPackBytesY3Y2Y1X2(FfxUInt32x2 i) +{ + return ((i.x >> 16) & 0x000000ffu) | (i.y & 0xffffff00u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 Y2 X0 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPU +FfxUInt32 ffxPackBytesY3Y2X0Y0(FfxUInt32x2 i) +{ + return ((i.x << 8) & 0x0000ff00u) | (i.y & 0xffff00ffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 Y2 X2 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPU +FfxUInt32 ffxPackBytesY3Y2X2Y0(FfxUInt32x2 i) +{ + return ((i.x >> 8) & 0x0000ff00u) | (i.y & 0xffff00ffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 X0 Y1 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPU +FfxUInt32 ffxPackBytesY3X0Y1Y0(FfxUInt32x2 i) +{ + return ((i.x << 16) & 0x00ff0000u) | (i.y & 0xff00ffffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y3 X2 Y1 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPU +FfxUInt32 ffxPackBytesY3X2Y1Y0(FfxUInt32x2 i) +{ + return ((i.x) & 0x00ff0000u) | (i.y & 0xff00ffffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// X0 Y2 Y1 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPU +FfxUInt32 ffxPackBytesX0Y2Y1Y0(FfxUInt32x2 i) +{ + return ((i.x << 24) & 0xff000000u) | (i.y & 0x00ffffffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// X2 Y2 Y1 Y0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPU +FfxUInt32 ffxPackBytesX2Y2Y1Y0(FfxUInt32x2 i) +{ + return ((i.x << 8) & 0xff000000u) | (i.y & 0x00ffffffu); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y2 X2 Y0 X0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPU +FfxUInt32 ffxPackBytesY2X2Y0X0(FfxUInt32x2 i) +{ + return ((i.x) & 0x00ff00ffu) | ((i.y << 8) & 0xff00ff00u); +} + +/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer. +/// +/// The resulting integer will contain bytes in the following order, from most to least significant: +/// Y2 Y0 X2 X0 +/// +/// @param [in] i The integer pair to pack. +/// +/// @returns +/// The packed integer value. +/// +/// @ingroup GPU +FfxUInt32 ffxPackBytesY2Y0X2X0(FfxUInt32x2 i) +{ + return (((i.x) & 0xffu) | ((i.x >> 8) & 0xff00u) | ((i.y << 16) & 0xff0000u) | ((i.y << 8) & 0xff000000u)); +} + +/// Takes two Float16x2 values x and y, normalizes them and builds a single Uint16x2 value in the format {{x0,y0},{x1,y1}}. +/// +/// @param [in] x The first float16x2 value to pack. +/// @param [in] y The second float16x2 value to pack. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPU +FfxUInt16x2 ffxPackX0Y0X1Y1UnsignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y) +{ + x *= FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0); + y *= FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0); + return FFX_UINT32_TO_UINT16X2(ffxPackBytesY2X2Y0X0(FfxUInt32x2(FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(x)), FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(y))))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7], +/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// r=ffxPermuteUByte0Float16x2ToUint2(d,i) +/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits +/// Where 'k1' is an SGPR with 0x???? +/// Where 'k2' is an SGPR with 0x???? +/// V_PK_FMA_F16 i,i,k0.x,0 +/// V_PERM_B32 r.x,i,i,k1 +/// V_PERM_B32 r.y,i,i,k2 +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPU +FfxUInt32x2 ffxPermuteUByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15], +/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// r=ffxPermuteUByte1Float16x2ToUint2(d,i) +/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits +/// Where 'k1' is an SGPR with 0x???? +/// Where 'k2' is an SGPR with 0x???? +/// V_PK_FMA_F16 i,i,k0.x,0 +/// V_PERM_B32 r.x,i,i,k1 +/// V_PERM_B32 r.y,i,i,k2 +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPU +FfxUInt32x2 ffxPermuteUByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23], +/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops. +/// +/// r=ffxPermuteUByte2Float16x2ToUint2(d,i) +/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits +/// Where 'k1' is an SGPR with 0x???? +/// Where 'k2' is an SGPR with 0x???? +/// V_PK_FMA_F16 i,i,k0.x,0 +/// V_PERM_B32 r.x,i,i,k1 +/// V_PERM_B32 r.y,i,i,k2 +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPU +FfxUInt32x2 ffxPermuteUByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31], +/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops. +/// +/// r=ffxPermuteUByte3Float16x2ToUint2(d,i) +/// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits +/// Where 'k1' is an SGPR with 0x???? +/// Where 'k2' is an SGPR with 0x???? +/// V_PK_FMA_F16 i,i,k0.x,0 +/// V_PERM_B32 r.x,i,i,k1 +/// V_PERM_B32 r.y,i,i,k2 +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPU +FfxUInt32x2 ffxPermuteUByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); + return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPU +FfxFloat16x2 ffxPermuteUByte0Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPU +FfxFloat16x2 ffxPermuteUByte1Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPU +FfxFloat16x2 ffxPermuteUByte2Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPU +FfxFloat16x2 ffxPermuteUByte3Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); +} + +/// Takes two Float16x2 values x and y, normalizes them and builds a single Uint16x2 value in the format {{x0,y0},{x1,y1}}. +/// +/// @param [in] x The first float16x2 value to pack. +/// @param [in] y The second float16x2 value to pack. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPU +FfxUInt16x2 ffxPackX0Y0X1Y1SignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y) +{ + x = x * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0); + y = y * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0); + return FFX_UINT32_TO_UINT16X2(ffxPackBytesY2X2Y0X0(FfxUInt32x2(FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(x)), FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(y))))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7], +/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPU +FfxUInt32x2 ffxPermuteSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15], +/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPU +FfxUInt32x2 ffxPermuteSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23], +/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPU +FfxUInt32x2 ffxPermuteSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); + return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31], +/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPU +FfxUInt32x2 ffxPermuteSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); + return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7], +/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). +/// This is useful if there is a desire for cleared values to decode as zero. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPU +FfxUInt32x2 ffxPermuteZeroBasedSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; + return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15], +/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops. +/// +/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). +/// This is useful if there is a desire for cleared values to decode as zero. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPU +FfxUInt32x2 ffxPermuteZeroBasedSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; + return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23], +/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops. +/// +/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). +/// This is useful if there is a desire for cleared values to decode as zero. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPU +FfxUInt32x2 ffxPermuteZeroBasedSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; + return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31], +/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops. +/// +/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero). +/// This is useful if there is a desire for cleared values to decode as zero. +/// +/// Handles signed byte values. +/// +/// @param [in] d The FfxUInt32x2 value to be packed. +/// @param [in] i The FfxFloat16x2 value to be packed. +/// +/// @returns +/// The packed FfxUInt32x2 value. +/// +/// @ingroup GPU +FfxUInt32x2 ffxPermuteZeroBasedSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) +{ + FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; + return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b))); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPU +FfxFloat16x2 ffxPermuteSByte0Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPU +FfxFloat16x2 ffxPermuteSByte1Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPU +FfxFloat16x2 ffxPermuteSByte2Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPU +FfxFloat16x2 ffxPermuteSByte3Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPU +FfxFloat16x2 ffxPermuteZeroBasedSByte0Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPU +FfxFloat16x2 ffxPermuteZeroBasedSByte1Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPU +FfxFloat16x2 ffxPermuteZeroBasedSByte2Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops. +/// +/// Handles signed byte values. +/// +/// @param [in] i The FfxUInt32x2 value to be unpacked. +/// +/// @returns +/// The unpacked FfxFloat16x2. +/// +/// @ingroup GPU +FfxFloat16x2 ffxPermuteZeroBasedSByte3Uint2ToFloat16x2(FfxUInt32x2 i) +{ + return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); +} + +/// Calculate a half-precision low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat16 ffxApproximateSqrtHalf(FfxFloat16 a) +{ + return FFX_TO_FLOAT16((FFX_TO_UINT16(a) >> FFX_BROADCAST_UINT16(1)) + FFX_BROADCAST_UINT16(0x1de2)); +} + +/// Calculate a half-precision low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat16x2 ffxApproximateSqrtHalf(FfxFloat16x2 a) +{ + return FFX_TO_FLOAT16X2((FFX_TO_UINT16X2(a) >> FFX_BROADCAST_UINT16X2(1)) + FFX_BROADCAST_UINT16X2(0x1de2)); +} + +/// Calculate a half-precision low-quality approximation for the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the square root for. +/// +/// @returns +/// An approximation of the square root, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat16x3 ffxApproximateSqrtHalf(FfxFloat16x3 a) +{ + return FFX_TO_FLOAT16X3((FFX_TO_UINT16X3(a) >> FFX_BROADCAST_UINT16X3(1)) + FFX_BROADCAST_UINT16X3(0x1de2)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat16 ffxApproximateReciprocalHalf(FfxFloat16 a) +{ + return FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x7784) - FFX_TO_UINT16(a)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat16x2 ffxApproximateReciprocalHalf(FfxFloat16x2 a) +{ + return FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x7784) - FFX_TO_UINT16X2(a)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat16x3 ffxApproximateReciprocalHalf(FfxFloat16x3 a) +{ + return FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x7784) - FFX_TO_UINT16X3(a)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat16x4 ffxApproximateReciprocalHalf(FfxFloat16x4 a) +{ + return FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x7784) - FFX_TO_UINT16X4(a)); +} + +/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPU +FfxFloat16 ffxApproximateReciprocalMediumHalf(FfxFloat16 a) +{ + FfxFloat16 b = FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x778d) - FFX_TO_UINT16(a)); + return b * (-b * a + FFX_BROADCAST_FLOAT16(2.0)); +} + +/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPU +FfxFloat16x2 ffxApproximateReciprocalMediumHalf(FfxFloat16x2 a) +{ + FfxFloat16x2 b = FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x778d) - FFX_TO_UINT16X2(a)); + return b * (-b * a + FFX_BROADCAST_FLOAT16X2(2.0)); +} + +/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPU +FfxFloat16x3 ffxApproximateReciprocalMediumHalf(FfxFloat16x3 a) +{ + FfxFloat16x3 b = FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x778d) - FFX_TO_UINT16X3(a)); + return b * (-b * a + FFX_BROADCAST_FLOAT16X3(2.0)); +} + +/// Calculate a half-precision medium-quality approximation for the reciprocal of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal for. +/// +/// @returns +/// An approximation of the reciprocal, estimated to medium quality. +/// +/// @ingroup GPU +FfxFloat16x4 ffxApproximateReciprocalMediumHalf(FfxFloat16x4 a) +{ + FfxFloat16x4 b = FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x778d) - FFX_TO_UINT16X4(a)); + return b * (-b * a + FFX_BROADCAST_FLOAT16X4(2.0)); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. +/// +/// @returns +/// An approximation of the reciprocal of the square root, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat16 ffxApproximateReciprocalSquareRootHalf(FfxFloat16 a) +{ + return FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x59a3) - (FFX_TO_UINT16(a) >> FFX_BROADCAST_UINT16(1))); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. +/// +/// @returns +/// An approximation of the reciprocal of the square root, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat16x2 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x2 a) +{ + return FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x59a3) - (FFX_TO_UINT16X2(a) >> FFX_BROADCAST_UINT16X2(1))); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. +/// +/// @returns +/// An approximation of the reciprocal of the square root, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat16x3 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x3 a) +{ + return FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x59a3) - (FFX_TO_UINT16X3(a) >> FFX_BROADCAST_UINT16X3(1))); +} + +/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value. +/// +/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent +/// presentation materials: +/// +/// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf +/// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h +/// +/// @param [in] a The value to calculate an approximate to the reciprocal of the square root for. +/// +/// @returns +/// An approximation of the reciprocal of the square root, estimated to low quality. +/// +/// @ingroup GPU +FfxFloat16x4 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x4 a) +{ + return FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x59a3) - (FFX_TO_UINT16X4(a) >> FFX_BROADCAST_UINT16X4(1))); +} + +/// An approximation of sine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate sine for. +/// +/// @returns +/// The approximate sine of value. +FfxFloat16 ffxParabolicSinHalf(FfxFloat16 x) +{ + return x * abs(x) - x; +} + +/// An approximation of sine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate sine for. +/// +/// @returns +/// The approximate sine of value. +FfxFloat16x2 ffxParabolicSinHalf(FfxFloat16x2 x) +{ + return x * abs(x) - x; +} + +/// An approximation of cosine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate cosine for. +/// +/// @returns +/// The approximate cosine of value. +FfxFloat16 ffxParabolicCosHalf(FfxFloat16 x) +{ + x = ffxFract(x * FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16(0.75)); + x = x * FFX_BROADCAST_FLOAT16(2.0) - FFX_BROADCAST_FLOAT16(1.0); + return ffxParabolicSinHalf(x); +} + +/// An approximation of cosine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate cosine for. +/// +/// @returns +/// The approximate cosine of value. +FfxFloat16x2 ffxParabolicCosHalf(FfxFloat16x2 x) +{ + x = ffxFract(x * FFX_BROADCAST_FLOAT16X2(0.5) + FFX_BROADCAST_FLOAT16X2(0.75)); + x = x * FFX_BROADCAST_FLOAT16X2(2.0) - FFX_BROADCAST_FLOAT16X2(1.0); + return ffxParabolicSinHalf(x); +} + +/// An approximation of both sine and cosine. +/// +/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range +/// is {-1/4 to 1/4} representing {-1 to 1}. +/// +/// @param [in] x The value to calculate approximate cosine for. +/// +/// @returns +/// A FfxFloat32x2 containing approximations of both sine and cosine of value. +FfxFloat16x2 ffxParabolicSinCosHalf(FfxFloat16 x) +{ + FfxFloat16 y = ffxFract(x * FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16(0.75)); + y = y * FFX_BROADCAST_FLOAT16(2.0) - FFX_BROADCAST_FLOAT16(1.0); + return ffxParabolicSinHalf(FfxFloat16x2(x, y)); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPU +FfxUInt16 ffxZeroOneAndHalf(FfxUInt16 x, FfxUInt16 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPU +FfxUInt16x2 ffxZeroOneAndHalf(FfxUInt16x2 x, FfxUInt16x2 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPU +FfxUInt16x3 ffxZeroOneAndHalf(FfxUInt16x3 x, FfxUInt16x3 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPU +FfxUInt16x4 ffxZeroOneAndHalf(FfxUInt16x4 x, FfxUInt16x4 y) +{ + return min(x, y); +} + +/// Conditional free logic NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPU +FfxUInt16 ffxZeroOneNotHalf(FfxUInt16 x) +{ + return x ^ FFX_BROADCAST_UINT16(1); +} + +/// Conditional free logic NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPU +FfxUInt16x2 ffxZeroOneNotHalf(FfxUInt16x2 x) +{ + return x ^ FFX_BROADCAST_UINT16X2(1); +} + +/// Conditional free logic NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPU +FfxUInt16x3 ffxZeroOneNotHalf(FfxUInt16x3 x) +{ + return x ^ FFX_BROADCAST_UINT16X3(1); +} + +/// Conditional free logic NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the NOT operator. +/// @param [in] y The second value to be fed into the NOT operator. +/// +/// @returns +/// Result of the NOT operation. +/// +/// @ingroup GPU +FfxUInt16x4 ffxZeroOneNotHalf(FfxUInt16x4 x) +{ + return x ^ FFX_BROADCAST_UINT16X4(1); +} + +/// Conditional free logic OR operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPU +FfxUInt16 ffxZeroOneOrHalf(FfxUInt16 x, FfxUInt16 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPU +FfxUInt16x2 ffxZeroOneOrHalf(FfxUInt16x2 x, FfxUInt16x2 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPU +FfxUInt16x3 ffxZeroOneOrHalf(FfxUInt16x3 x, FfxUInt16x3 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPU +FfxUInt16x4 ffxZeroOneOrHalf(FfxUInt16x4 x, FfxUInt16x4 y) +{ + return max(x, y); +} + +/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. +/// +/// @param [in] x The value to converted to a Uint. +/// +/// @returns +/// The converted Uint value. +/// +/// @ingroup GPU +FfxUInt16 ffxZeroOneFloat16ToUint16(FfxFloat16 x) +{ + return FFX_TO_UINT16(x * FFX_TO_FLOAT16(FFX_TO_UINT16(1))); +} + +/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. +/// +/// @param [in] x The value to converted to a Uint. +/// +/// @returns +/// The converted Uint value. +/// +/// @ingroup GPU +FfxUInt16x2 ffxZeroOneFloat16x2ToUint16x2(FfxFloat16x2 x) +{ + return FFX_TO_UINT16X2(x * FFX_TO_FLOAT16X2(FfxUInt16x2(1, 1))); +} + +/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. +/// +/// @param [in] x The value to converted to a Uint. +/// +/// @returns +/// The converted Uint value. +/// +/// @ingroup GPU +FfxUInt16x3 ffxZeroOneFloat16x3ToUint16x3(FfxFloat16x3 x) +{ + return FFX_TO_UINT16X3(x * FFX_TO_FLOAT16X3(FfxUInt16x3(1, 1, 1))); +} + +/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint. +/// +/// @param [in] x The value to converted to a Uint. +/// +/// @returns +/// The converted Uint value. +/// +/// @ingroup GPU +FfxUInt16x4 ffxZeroOneFloat16x4ToUint16x4(FfxFloat16x4 x) +{ + return FFX_TO_UINT16X4(x * FFX_TO_FLOAT16X4(FfxUInt16x4(1, 1, 1, 1))); +} + +/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. +/// +/// @param [in] x The value to converted to a half-precision FfxFloat32. +/// +/// @returns +/// The converted half-precision FfxFloat32 value. +/// +/// @ingroup GPU +FfxFloat16 ffxZeroOneUint16ToFloat16(FfxUInt16 x) +{ + return FFX_TO_FLOAT16(x * FFX_TO_UINT16(FFX_TO_FLOAT16(1.0))); +} + +/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. +/// +/// @param [in] x The value to converted to a half-precision FfxFloat32. +/// +/// @returns +/// The converted half-precision FfxFloat32 value. +/// +/// @ingroup GPU +FfxFloat16x2 ffxZeroOneUint16x2ToFloat16x2(FfxUInt16x2 x) +{ + return FFX_TO_FLOAT16X2(x * FFX_TO_UINT16X2(FfxUInt16x2(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0)))); +} + +/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. +/// +/// @param [in] x The value to converted to a half-precision FfxFloat32. +/// +/// @returns +/// The converted half-precision FfxFloat32 value. +/// +/// @ingroup GPU +FfxFloat16x3 ffxZeroOneUint16x3ToFloat16x3(FfxUInt16x3 x) +{ + return FFX_TO_FLOAT16X3(x * FFX_TO_UINT16X3(FfxUInt16x3(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0)))); +} + +/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32. +/// +/// @param [in] x The value to converted to a half-precision FfxFloat32. +/// +/// @returns +/// The converted half-precision FfxFloat32 value. +/// +/// @ingroup GPU +FfxFloat16x4 ffxZeroOneUint16x4ToFloat16x4(FfxUInt16x4 x) +{ + return FFX_TO_FLOAT16X4(x * FFX_TO_UINT16X4(FfxUInt16x4(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0)))); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPU +FfxFloat16 ffxZeroOneAndHalf(FfxFloat16 x, FfxFloat16 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPU +FfxFloat16x2 ffxZeroOneAndHalf(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPU +FfxFloat16x3 ffxZeroOneAndHalf(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return min(x, y); +} + +/// Conditional free logic AND operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// +/// @returns +/// Result of the AND operation. +/// +/// @ingroup GPU +FfxFloat16x4 ffxZeroOneAndHalf(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return min(x, y); +} + +/// Conditional free logic AND NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND NOT operator. +/// @param [in] y The second value to be fed into the AND NOT operator. +/// +/// @returns +/// Result of the AND NOT operation. +/// +/// @ingroup GPU +FfxFloat16 ffxSignedZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y) +{ + return (-x) * y + FFX_BROADCAST_FLOAT16(1.0); +} + +/// Conditional free logic AND NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND NOT operator. +/// @param [in] y The second value to be fed into the AND NOT operator. +/// +/// @returns +/// Result of the AND NOT operation. +/// +/// @ingroup GPU +FfxFloat16x2 ffxSignedZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return (-x) * y + FFX_BROADCAST_FLOAT16X2(1.0); +} + +/// Conditional free logic AND NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND NOT operator. +/// @param [in] y The second value to be fed into the AND NOT operator. +/// +/// @returns +/// Result of the AND NOT operation. +/// +/// @ingroup GPU +FfxFloat16x3 ffxSignedZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return (-x) * y + FFX_BROADCAST_FLOAT16X3(1.0); +} + +/// Conditional free logic AND NOT operation using two half-precision values. +/// +/// @param [in] x The first value to be fed into the AND NOT operator. +/// @param [in] y The second value to be fed into the AND NOT operator. +/// +/// @returns +/// Result of the AND NOT operation. +/// +/// @ingroup GPU +FfxFloat16x4 ffxSignedZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return (-x) * y + FFX_BROADCAST_FLOAT16X4(1.0); +} + +/// Conditional free logic AND operation using two half-precision values followed by +/// a NOT operation using the resulting value and a third half-precision value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPU +FfxFloat16 ffxZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two half-precision values followed by +/// a NOT operation using the resulting value and a third half-precision value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPU +FfxFloat16x2 ffxZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two half-precision values followed by +/// a NOT operation using the resulting value and a third half-precision value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPU +FfxFloat16x3 ffxZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) +{ + return ffxSaturate(x * y + z); +} + +/// Conditional free logic AND operation using two half-precision values followed by +/// a NOT operation using the resulting value and a third half-precision value. +/// +/// @param [in] x The first value to be fed into the AND operator. +/// @param [in] y The second value to be fed into the AND operator. +/// @param [in] z The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPU +FfxFloat16x4 ffxZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) +{ + return ffxSaturate(x * y + z); +} + +/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPU +FfxFloat16 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPU +FfxFloat16x2 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x2 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPU +FfxFloat16x3 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x3 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the greater than zero comparison. +/// +/// @ingroup GPU +FfxFloat16x4 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x4 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF)); +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPU +FfxFloat16 ffxZeroOneNotHalf(FfxFloat16 x) +{ + return FFX_BROADCAST_FLOAT16(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPU +FfxFloat16x2 ffxZeroOneNotHalf(FfxFloat16x2 x) +{ + return FFX_BROADCAST_FLOAT16X2(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPU +FfxFloat16x3 ffxZeroOneNotHalf(FfxFloat16x3 x) +{ + return FFX_BROADCAST_FLOAT16X3(1.0) - x; +} + +/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the AND OR operator. +/// +/// @returns +/// Result of the AND OR operation. +/// +/// @ingroup GPU +FfxFloat16x4 ffxZeroOneNotHalf(FfxFloat16x4 x) +{ + return FFX_BROADCAST_FLOAT16X4(1.0) - x; +} + +/// Conditional free logic OR operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPU +FfxFloat16 ffxZeroOneOrHalf(FfxFloat16 x, FfxFloat16 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPU +FfxFloat16x2 ffxZeroOneOrHalf(FfxFloat16x2 x, FfxFloat16x2 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPU +FfxFloat16x3 ffxZeroOneOrHalf(FfxFloat16x3 x, FfxFloat16x3 y) +{ + return max(x, y); +} + +/// Conditional free logic OR operation using two half-precision FfxFloat32 values. +/// +/// @param [in] x The first value to be fed into the OR operator. +/// @param [in] y The second value to be fed into the OR operator. +/// +/// @returns +/// Result of the OR operation. +/// +/// @ingroup GPU +FfxFloat16x4 ffxZeroOneOrHalf(FfxFloat16x4 x, FfxFloat16x4 y) +{ + return max(x, y); +} + +/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPU +FfxFloat16 ffxZeroOneSelectHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) +{ + FfxFloat16 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPU +FfxFloat16x2 ffxZeroOneSelectHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) +{ + FfxFloat16x2 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPU +FfxFloat16x3 ffxZeroOneSelectHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) +{ + FfxFloat16x3 r = (-x) * z + z; + return x * y + r; +} + +/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero. +/// +/// @param [in] x The value to compare against zero. +/// @param [in] y The value to return if the comparision is greater than zero. +/// @param [in] z The value to return if the comparision is less than or equal to zero. +/// +/// @returns +/// The selected value. +/// +/// @ingroup GPU +FfxFloat16x4 ffxZeroOneSelectHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) +{ + FfxFloat16x4 r = (-x) * z + z; + return x * y + r; +} + +/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPU +FfxFloat16 ffxZeroOneIsSignedHalf(FfxFloat16 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPU +FfxFloat16x2 ffxZeroOneIsSignedHalf(FfxFloat16x2 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPU +FfxFloat16x3 ffxZeroOneIsSignedHalf(FfxFloat16x3 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not. +/// +/// @param [in] x The value to be compared. +/// +/// @returns +/// Result of the sign value. +/// +/// @ingroup GPU +FfxFloat16x4 ffxZeroOneIsSignedHalf(FfxFloat16x4 x) +{ + return ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF)); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] c The color to convert to Rec. 709. +/// +/// @returns +/// The color in Rec.709 space. +/// +/// @ingroup GPU +FfxFloat16 ffxRec709FromLinearHalf(FfxFloat16 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099); + return clamp(j.x, c * j.y, pow(c, j.z) * k.x + k.y); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] c The color to convert to Rec. 709. +/// +/// @returns +/// The color in Rec.709 space. +/// +/// @ingroup GPU +FfxFloat16x2 ffxRec709FromLinearHalf(FfxFloat16x2 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099); + return clamp(j.xx, c * j.yy, pow(c, j.zz) * k.xx + k.yy); +} + +/// Compute a Rec.709 color space. +/// +/// Rec.709 is used for some HDTVs. +/// +/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. +/// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). +/// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). +/// +/// @param [in] c The color to convert to Rec. 709. +/// +/// @returns +/// The color in Rec.709 space. +/// +/// @ingroup GPU +FfxFloat16x3 ffxRec709FromLinearHalf(FfxFloat16x3 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099); + return clamp(j.xxx, c * j.yyy, pow(c, j.zzz) * k.xxx + k.yyy); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGammaHalf. +/// +/// @param [in] c The value to convert to gamma space from linear. +/// @param [in] rcpX The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPU +FfxFloat16 ffxGammaFromLinearHalf(FfxFloat16 c, FfxFloat16 rcpX) +{ + return pow(c, FFX_BROADCAST_FLOAT16(rcpX)); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGammaHalf. +/// +/// @param [in] c The value to convert to gamma space from linear. +/// @param [in] rcpX The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPU +FfxFloat16x2 ffxGammaFromLinearHalf(FfxFloat16x2 c, FfxFloat16 rcpX) +{ + return pow(c, FFX_BROADCAST_FLOAT16X2(rcpX)); +} + +/// Compute a gamma value from a linear value. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in ffxLinearFromGammaHalf. +/// +/// @param [in] c The value to convert to gamma space from linear. +/// @param [in] rcpX The reciprocal of power value used for the gamma curve. +/// +/// @returns +/// A value in gamma space. +/// +/// @ingroup GPU +FfxFloat16x3 ffxGammaFromLinearHalf(FfxFloat16x3 c, FfxFloat16 rcpX) +{ + return pow(c, FFX_BROADCAST_FLOAT16X3(rcpX)); +} + +/// Compute an SRGB value from a linear value. +/// +/// @param [in] c The value to convert to SRGB from linear. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPU +FfxFloat16 ffxSrgbFromLinearHalf(FfxFloat16 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055); + return clamp(j.x, c * j.y, pow(c, j.z) * k.x + k.y); +} + +/// Compute an SRGB value from a linear value. +/// +/// @param [in] c The value to convert to SRGB from linear. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPU +FfxFloat16x2 ffxSrgbFromLinearHalf(FfxFloat16x2 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055); + return clamp(j.xx, c * j.yy, pow(c, j.zz) * k.xx + k.yy); +} + +/// Compute an SRGB value from a linear value. +/// +/// @param [in] c The value to convert to SRGB from linear. +/// +/// @returns +/// A value in SRGB space. +/// +/// @ingroup GPU +FfxFloat16x3 ffxSrgbFromLinearHalf(FfxFloat16x3 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055); + return clamp(j.xxx, c * j.yyy, pow(c, j.zzz) * k.xxx + k.yyy); +} + +/// Compute the square root of a value. +/// +/// @param [in] c The value to compute the square root for. +/// +/// @returns +/// A square root of the input value. +/// +/// @ingroup GPU +FfxFloat16 ffxSquareRootHalf(FfxFloat16 c) +{ + return sqrt(c); +} + +/// Compute the square root of a value. +/// +/// @param [in] c The value to compute the square root for. +/// +/// @returns +/// A square root of the input value. +/// +/// @ingroup GPU +FfxFloat16x2 ffxSquareRootHalf(FfxFloat16x2 c) +{ + return sqrt(c); +} + +/// Compute the square root of a value. +/// +/// @param [in] c The value to compute the square root for. +/// +/// @returns +/// A square root of the input value. +/// +/// @ingroup GPU +FfxFloat16x3 ffxSquareRootHalf(FfxFloat16x3 c) +{ + return sqrt(c); +} + +/// Compute the cube root of a value. +/// +/// @param [in] c The value to compute the cube root for. +/// +/// @returns +/// A cube root of the input value. +/// +/// @ingroup GPU +FfxFloat16 ffxCubeRootHalf(FfxFloat16 c) +{ + return pow(c, FFX_BROADCAST_FLOAT16(1.0 / 3.0)); +} + +/// Compute the cube root of a value. +/// +/// @param [in] c The value to compute the cube root for. +/// +/// @returns +/// A cube root of the input value. +/// +/// @ingroup GPU +FfxFloat16x2 ffxCubeRootHalf(FfxFloat16x2 c) +{ + return pow(c, FFX_BROADCAST_FLOAT16X2(1.0 / 3.0)); +} + +/// Compute the cube root of a value. +/// +/// @param [in] c The value to compute the cube root for. +/// +/// @returns +/// A cube root of the input value. +/// +/// @ingroup GPU +FfxFloat16x3 ffxCubeRootHalf(FfxFloat16x3 c) +{ + return pow(c, FFX_BROADCAST_FLOAT16X3(1.0 / 3.0)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] c The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat16 ffxLinearFromRec709Half(FfxFloat16 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] c The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat16x2 ffxLinearFromRec709Half(FfxFloat16x2 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] c The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat16x3 ffxLinearFromRec709Half(FfxFloat16x3 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in gamma space. +/// @param [in] x The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat16 ffxLinearFromGammaHalf(FfxFloat16 c, FfxFloat16 x) +{ + return pow(c, FFX_BROADCAST_FLOAT16(x)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in gamma space. +/// @param [in] x The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat16x2 ffxLinearFromGammaHalf(FfxFloat16x2 c, FfxFloat16 x) +{ + return pow(c, FFX_BROADCAST_FLOAT16X2(x)); +} + +/// Compute a linear value from a value in a gamma space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in gamma space. +/// @param [in] x The power value used for the gamma curve. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat16x3 ffxLinearFromGammaHalf(FfxFloat16x3 c, FfxFloat16 x) +{ + return pow(c, FFX_BROADCAST_FLOAT16X3(x)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz)); +} + +/// Compute a linear value from a value in a SRGB space. +/// +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// +/// @param [in] c The value to convert to linear in SRGB space. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPU +FfxFloat16x3 ffxLinearFromSrgbHalf(FfxFloat16x3 c) +{ + FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); + return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz)); +} + +/// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear. +/// +/// 543210 +/// ====== +/// ..xxx. +/// yy...y +/// +/// @param [in] a The input 1D coordinates to remap. +/// +/// @returns +/// The remapped 2D coordinates. +/// +/// @ingroup GPU +FfxUInt16x2 ffxRemapForQuadHalf(FfxUInt32 a) +{ + return FfxUInt16x2(bitfieldExtract(a, 1u, 3u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), a, 1u)); +} + +/// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions. +/// +/// The 64-wide lane indices to 8x8 remapping is performed as follows: +/// +/// 00 01 08 09 10 11 18 19 +/// 02 03 0a 0b 12 13 1a 1b +/// 04 05 0c 0d 14 15 1c 1d +/// 06 07 0e 0f 16 17 1e 1f +/// 20 21 28 29 30 31 38 39 +/// 22 23 2a 2b 32 33 3a 3b +/// 24 25 2c 2d 34 35 3c 3d +/// 26 27 2e 2f 36 37 3e 3f +/// +/// @param [in] a The input 1D coordinate to remap. +/// +/// @returns +/// The remapped 2D coordinates. +/// +/// @ingroup GPU +FfxUInt16x2 ffxRemapForWaveReductionHalf(FfxUInt32 a) +{ + return FfxUInt16x2(bitfieldInsertMask(bitfieldExtract(a, 2u, 3u), a, 1u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), bitfieldExtract(a, 1u, 2u), 2u)); +} + +#endif // FFX_HALF diff --git a/Assets/Shaders/FSR2/ffx_core_gpu_common_half.h.meta b/Assets/Shaders/FSR2/ffx_core_gpu_common_half.h.meta new file mode 100644 index 0000000..a31ee9b --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_core_gpu_common_half.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 3df3a1ba6be641845a6139e19a3f6000 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_core_hlsl.h b/Assets/Shaders/FSR2/ffx_core_hlsl.h new file mode 100644 index 0000000..e1db5e3 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_core_hlsl.h @@ -0,0 +1,1527 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +/// A define for abstracting shared memory between shading languages. +/// +/// @ingroup GPU +#define FFX_GROUPSHARED groupshared + +/// A define for abstracting compute memory barriers between shading languages. +/// +/// @ingroup GPU +#define FFX_GROUP_MEMORY_BARRIER GroupMemoryBarrierWithGroupSync + +/// A define added to accept static markup on functions to aid CPU/GPU portability of code. +/// +/// @ingroup GPU +#define FFX_STATIC static + +/// A define for abstracting loop unrolling between shading languages. +/// +/// @ingroup GPU +#define FFX_UNROLL [unroll] + +/// A define for abstracting a 'greater than' comparison operator between two types. +/// +/// @ingroup GPU +#define FFX_GREATER_THAN(x, y) x > y + +/// A define for abstracting a 'greater than or equal' comparison operator between two types. +/// +/// @ingroup GPU +#define FFX_GREATER_THAN_EQUAL(x, y) x >= y + +/// A define for abstracting a 'less than' comparison operator between two types. +/// +/// @ingroup GPU +#define FFX_LESS_THAN(x, y) x < y + +/// A define for abstracting a 'less than or equal' comparison operator between two types. +/// +/// @ingroup GPU +#define FFX_LESS_THAN_EQUAL(x, y) x <= y + +/// A define for abstracting an 'equal' comparison operator between two types. +/// +/// @ingroup GPU +#define FFX_EQUAL(x, y) x == y + +/// A define for abstracting a 'not equal' comparison operator between two types. +/// +/// @ingroup GPU +#define FFX_NOT_EQUAL(x, y) x != y + +/// Broadcast a scalar value to a 1-dimensional floating point vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_FLOAT32(x) FfxFloat32(x) + +/// Broadcast a scalar value to a 2-dimensional floating point vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_FLOAT32X2(x) FfxFloat32(x) + +/// Broadcast a scalar value to a 3-dimensional floating point vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_FLOAT32X3(x) FfxFloat32(x) + +/// Broadcast a scalar value to a 4-dimensional floating point vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_FLOAT32X4(x) FfxFloat32(x) + +/// Broadcast a scalar value to a 1-dimensional unsigned integer vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_UINT32(x) FfxUInt32(x) + +/// Broadcast a scalar value to a 2-dimensional unsigned integer vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_UINT32X2(x) FfxUInt32(x) + +/// Broadcast a scalar value to a 4-dimensional unsigned integer vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_UINT32X3(x) FfxUInt32(x) + +/// Broadcast a scalar value to a 4-dimensional unsigned integer vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_UINT32X4(x) FfxUInt32(x) + +/// Broadcast a scalar value to a 1-dimensional signed integer vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_INT32(x) FfxInt32(x) + +/// Broadcast a scalar value to a 2-dimensional signed integer vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_INT32X2(x) FfxInt32(x) + +/// Broadcast a scalar value to a 3-dimensional signed integer vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_INT32X3(x) FfxInt32(x) + +/// Broadcast a scalar value to a 4-dimensional signed integer vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_INT32X4(x) FfxInt32(x) + +/// Broadcast a scalar value to a 1-dimensional half-precision floating point vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_MIN_FLOAT16(a) FFX_MIN16_F(a) + +/// Broadcast a scalar value to a 2-dimensional half-precision floating point vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_MIN_FLOAT16X2(a) FFX_MIN16_F(a) + +/// Broadcast a scalar value to a 3-dimensional half-precision floating point vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_MIN_FLOAT16X3(a) FFX_MIN16_F(a) + +/// Broadcast a scalar value to a 4-dimensional half-precision floating point vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_MIN_FLOAT16X4(a) FFX_MIN16_F(a) + +/// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_MIN_UINT16(a) FFX_MIN16_U(a) + +/// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_MIN_UINT16X2(a) FFX_MIN16_U(a) + +/// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_MIN_UINT16X3(a) FFX_MIN16_U(a) + +/// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_MIN_UINT16X4(a) FFX_MIN16_U(a) + +/// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_MIN_INT16(a) FFX_MIN16_I(a) + +/// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_MIN_INT16X2(a) FFX_MIN16_I(a) + +/// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_MIN_INT16X3(a) FFX_MIN16_I(a) + +/// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector. +/// +/// @ingroup GPU +#define FFX_BROADCAST_MIN_INT16X4(a) FFX_MIN16_I(a) + +/// Pack 2x32-bit floating point values in a single 32bit value. +/// +/// This function first converts each component of value into their nearest 16-bit floating +/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the +/// 32bit unsigned integer respectively. +/// +/// @param [in] value A 2-dimensional floating point value to convert and pack. +/// +/// @returns +/// A packed 32bit value containing 2 16bit floating point values. +/// +/// @ingroup HLSL +FfxUInt32 packHalf2x16(FfxFloat32x2 value) +{ + return f32tof16(value.x) | (f32tof16(value.y) << 16); +} + +/// Broadcast a scalar value to a 2-dimensional floating point vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 2-dimensional floating point vector with value in each component. +/// +/// @ingroup HLSL +FfxFloat32x2 ffxBroadcast2(FfxFloat32 value) +{ + return FfxFloat32x2(value, value); +} + +/// Broadcast a scalar value to a 3-dimensional floating point vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 3-dimensional floating point vector with value in each component. +/// +/// @ingroup HLSL +FfxFloat32x3 ffxBroadcast3(FfxFloat32 value) +{ + return FfxFloat32x3(value, value, value); +} + +/// Broadcast a scalar value to a 4-dimensional floating point vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 4-dimensional floating point vector with value in each component. +/// +/// @ingroup HLSL +FfxFloat32x4 ffxBroadcast4(FfxFloat32 value) +{ + return FfxFloat32x4(value, value, value, value); +} + +/// Broadcast a scalar value to a 2-dimensional signed integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 2-dimensional signed integer vector with value in each component. +/// +/// @ingroup HLSL +FfxInt32x2 ffxBroadcast2(FfxInt32 value) +{ + return FfxInt32x2(value, value); +} + +/// Broadcast a scalar value to a 3-dimensional signed integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 3-dimensional signed integer vector with value in each component. +/// +/// @ingroup HLSL +FfxUInt32x3 ffxBroadcast3(FfxInt32 value) +{ + return FfxUInt32x3(value, value, value); +} + +/// Broadcast a scalar value to a 4-dimensional signed integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 4-dimensional signed integer vector with value in each component. +/// +/// @ingroup HLSL +FfxInt32x4 ffxBroadcast4(FfxInt32 value) +{ + return FfxInt32x4(value, value, value, value); +} + +/// Broadcast a scalar value to a 2-dimensional unsigned integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 2-dimensional unsigned integer vector with value in each component. +/// +/// @ingroup HLSL +FfxUInt32x2 ffxBroadcast2(FfxUInt32 value) +{ + return FfxUInt32x2(value, value); +} + +/// Broadcast a scalar value to a 3-dimensional unsigned integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 3-dimensional unsigned integer vector with value in each component. +/// +/// @ingroup HLSL +FfxUInt32x3 ffxBroadcast3(FfxUInt32 value) +{ + return FfxUInt32x3(value, value, value); +} + +/// Broadcast a scalar value to a 4-dimensional unsigned integer vector. +/// +/// @param [in] value The value to to broadcast. +/// +/// @returns +/// A 4-dimensional unsigned integer vector with value in each component. +/// +/// @ingroup HLSL +FfxUInt32x4 ffxBroadcast4(FfxUInt32 value) +{ + return FfxUInt32x4(value, value, value, value); +} + +FfxUInt32 bitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits) +{ + FfxUInt32 mask = (1u << bits) - 1; + return (src >> off) & mask; +} + +FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask) +{ + return (ins & mask) | (src & (~mask)); +} + +FfxUInt32 bitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits) +{ + FfxUInt32 mask = (1u << bits) - 1; + return (ins & mask) | (src & (~mask)); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] value The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup HLSL +FfxUInt32 ffxAsUInt32(FfxFloat32 x) +{ + return asuint(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] value The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup HLSL +FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x) +{ + return asuint(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] value The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup HLSL +FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x) +{ + return asuint(x); +} + +/// Interprets the bit pattern of x as an unsigned integer. +/// +/// @param [in] value The input value. +/// +/// @returns +/// The input interpreted as an unsigned integer. +/// +/// @ingroup HLSL +FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x) +{ + return asuint(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] value The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup HLSL +FfxFloat32 ffxAsFloat(FfxUInt32 x) +{ + return asfloat(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] value The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup HLSL +FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x) +{ + return asfloat(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] value The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup HLSL +FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x) +{ + return asfloat(x); +} + +/// Interprets the bit pattern of x as a floating-point number. +/// +/// @param [in] value The input value. +/// +/// @returns +/// The input interpreted as a floating-point number. +/// +/// @ingroup HLSL +FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x) +{ + return asfloat(x); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSL +FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSL +FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSL +FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSL +FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSL +FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSL +FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t) +{ + return lerp(x, y, t); +} + +/// Compute the linear interopation between two values. +/// +/// Implemented by calling the HLSL mix instrinsic function. Implements the +/// following math: +/// +/// (1 - t) * x + t * y +/// +/// @param [in] x The first value to lerp between. +/// @param [in] y The second value to lerp between. +/// @param [in] t The value to determine how much of x and how much of y. +/// +/// @returns +/// A linearly interpolated value between x and y according to t. +/// +/// @ingroup HLSL +FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t) +{ + return lerp(x, y, t); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup HLSL +FfxFloat32 ffxSaturate(FfxFloat32 x) +{ + return saturate(x); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup HLSL +FfxFloat32x2 ffxSaturate(FfxFloat32x2 x) +{ + return saturate(x); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup HLSL +FfxFloat32x3 ffxSaturate(FfxFloat32x3 x) +{ + return saturate(x); +} + +/// Clamp a value to a [0..1] range. +/// +/// @param [in] x The value to clamp to [0..1] range. +/// +/// @returns +/// The clamped version of x. +/// +/// @ingroup HLSL +FfxFloat32x4 ffxSaturate(FfxFloat32x4 x) +{ + return saturate(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup HLSL +FfxFloat32 ffxFract(FfxFloat32 x) +{ + return x - floor(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup HLSL +FfxFloat32x2 ffxFract(FfxFloat32x2 x) +{ + return x - floor(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup HLSL +FfxFloat32x3 ffxFract(FfxFloat32x3 x) +{ + return x - floor(x); +} + +/// Compute the factional part of a decimal value. +/// +/// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// function. +/// +/// @param [in] x The value to compute the fractional part from. +/// +/// @returns +/// The fractional part of x. +/// +/// @ingroup HLSL +FfxFloat32x4 ffxFract(FfxFloat32x4 x) +{ + return x - floor(x); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSL +FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSL +FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSL +FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSL +FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSL +FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSL +FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSL +FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) +{ + return max(x, max(y, z)); +} + +/// Compute the maximum of three values. +/// +/// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the max calculation. +/// @param [in] y The second value to include in the max calcuation. +/// @param [in] z The third value to include in the max calcuation. +/// +/// @returns +/// The maximum value of x, y, and z. +/// +/// @ingroup HLSL +FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) +{ + return max(x, max(y, z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the median of three values. +/// +/// NOTE: This function should compile down to a single V_MED3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the median calculation. +/// @param [in] y The second value to include in the median calcuation. +/// @param [in] z The third value to include in the median calcuation. +/// +/// @returns +/// The median value of x, y, and z. +/// +/// @ingroup HLSL +FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSL +FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSL +FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSL +FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSL +FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSL +FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSL +FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSL +FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) +{ + return min(x, min(y, z)); +} + +/// Compute the minimum of three values. +/// +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// +/// @param [in] x The first value to include in the min calculation. +/// @param [in] y The second value to include in the min calcuation. +/// @param [in] z The third value to include in the min calcuation. +/// +/// @returns +/// The minimum value of x, y, and z. +/// +/// @ingroup HLSL +FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) +{ + return min(x, min(y, z)); +} + + +FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b) +{ + return FfxUInt32(FfxInt32(a) >> FfxInt32(b)); +} + +//============================================================================================================================== +// HLSL HALF +//============================================================================================================================== +#if FFX_HALF + +//============================================================================================================================== +// Need to use manual unpack to get optimal execution (don't use packed types in buffers directly). +// Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/ +FfxFloat16x2 ffxUint32ToFloat16x2(FfxUInt32 x) +{ + FfxFloat32x2 t = f16tof32(FfxUInt32x2(x & 0xFFFF, x >> 16)); + return FfxFloat16x2(t); +} +FfxFloat16x4 ffxUint32x2ToFloat16x4(FfxUInt32x2 x) +{ + return FfxFloat16x4(ffxUint32ToFloat16x2(x.x), ffxUint32ToFloat16x2(x.y)); +} +FfxUInt16x2 ffxUint32ToUint16x2(FfxUInt32 x) +{ + FfxUInt32x2 t = FfxUInt32x2(x & 0xFFFF, x >> 16); + return FfxUInt16x2(t); +} +FfxUInt16x4 ffxUint32x2ToUint16x4(FfxUInt32x2 x) +{ + return FfxUInt16x4(ffxUint32ToUint16x2(x.x), ffxUint32ToUint16x2(x.y)); +} +#define FFX_UINT32_TO_FLOAT16X2(x) ffxUint32ToFloat16x2(FfxUInt32(x)) +#define FFX_UINT32X2_TO_FLOAT16X4(x) ffxUint32x2ToFloat16x4(FfxUInt32x2(x)) +#define FFX_UINT32_TO_UINT16X2(x) ffxUint32ToUint16x2(FfxUInt32(x)) +#define FFX_UINT32X2_TO_UINT16X4(x) ffxUint32x2ToUint16x4(FfxUInt32x2(x)) +//------------------------------------------------------------------------------------------------------------------------------ +FfxUInt32 ffxFloat16x2ToUint32(FfxFloat16x2 x) +{ + return f32tof16(x.x) + (f32tof16(x.y) << 16); +} +FfxUInt32x2 ffxFloat16x4ToUint32x2(FfxFloat16x4 x) +{ + return FfxUInt32x2(ffxFloat16x2ToUint32(x.xy), ffxFloat16x2ToUint32(x.zw)); +} +FfxUInt32 ffxUint16x2ToUint32(FfxUInt16x2 x) +{ + return FfxUInt32(x.x) + (FfxUInt32(x.y) << 16); +} +FfxUInt32x2 ffxUint16x4ToUint32x2(FfxUInt16x4 x) +{ + return FfxUInt32x2(ffxUint16x2ToUint32(x.xy), ffxUint16x2ToUint32(x.zw)); +} +#define FFX_FLOAT16X2_TO_UINT32(x) ffxFloat16x2ToUint32(FfxFloat16x2(x)) +#define FFX_FLOAT16X4_TO_UINT32X2(x) ffxFloat16x4ToUint32x2(FfxFloat16x4(x)) +#define FFX_UINT16X2_TO_UINT32(x) ffxUint16x2ToUint32(FfxUInt16x2(x)) +#define FFX_UINT16X4_TO_UINT32X2(x) ffxUint16x4ToUint32x2(FfxUInt16x4(x)) + +#if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST) + #define FFX_TO_UINT16(x) asuint16(x) + #define FFX_TO_UINT16X2(x) asuint16(x) + #define FFX_TO_UINT16X3(x) asuint16(x) + #define FFX_TO_UINT16X4(x) asuint16(x) +#else + #define FFX_TO_UINT16(a) FfxUInt16(f32tof16(FfxFloat32(a))) + #define FFX_TO_UINT16X2(a) FfxUInt16x2(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y)) + #define FFX_TO_UINT16X3(a) FfxUInt16x3(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z)) + #define FFX_TO_UINT16X4(a) FfxUInt16x4(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z), FFX_TO_UINT16((a).w)) +#endif // #if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST) + +#if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST) + #define FFX_TO_FLOAT16(x) asfloat16(x) + #define FFX_TO_FLOAT16X2(x) asfloat16(x) + #define FFX_TO_FLOAT16X3(x) asfloat16(x) + #define FFX_TO_FLOAT16X4(x) asfloat16(x) +#else + #define FFX_TO_FLOAT16(a) FfxFloat16(f16tof32(FfxUInt32(a))) + #define FFX_TO_FLOAT16X2(a) FfxFloat16x2(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y)) + #define FFX_TO_FLOAT16X3(a) FfxFloat16x3(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z)) + #define FFX_TO_FLOAT16X4(a) FfxFloat16x4(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z), FFX_TO_FLOAT16((a).w)) +#endif // #if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST) + +//============================================================================================================================== +#if FFX_HLSL_6_2 +FfxFloat16 ffxBroadcastFloat16(FfxFloat16 a) +{ + return FfxFloat16(a); +} +FfxFloat16x2 ffxBroadcastFloat16x2(FfxFloat16 a) +{ + return FfxFloat16x2(a, a); +} +FfxFloat16x3 ffxBroadcastFloat16x3(FfxFloat16 a) +{ + return FfxFloat16x3(a, a, a); +} +FfxFloat16x4 ffxBroadcastFloat16x4(FfxFloat16 a) +{ + return FfxFloat16x4(a, a, a, a); +} +#define FFX_BROADCAST_FLOAT16(a) FfxFloat16(a) +#define FFX_BROADCAST_FLOAT16X2(a) FfxFloat16(a) +#define FFX_BROADCAST_FLOAT16X3(a) FfxFloat16(a) +#define FFX_BROADCAST_FLOAT16X4(a) FfxFloat16(a) +#else +#define FFX_BROADCAST_FLOAT16(a) FFX_MIN16_F(a) +#define FFX_BROADCAST_FLOAT16X2(a) FFX_MIN16_F(a) +#define FFX_BROADCAST_FLOAT16X3(a) FFX_MIN16_F(a) +#define FFX_BROADCAST_FLOAT16X4(a) FFX_MIN16_F(a) +#endif +//------------------------------------------------------------------------------------------------------------------------------ +#if FFX_HLSL_6_2 +FfxInt16 ffxBroadcastInt16(FfxInt16 a) +{ + return FfxInt16(a); +} +FfxInt16x2 ffxBroadcastInt16x2(FfxInt16 a) +{ + return FfxInt16x2(a, a); +} +FfxInt16x3 ffxBroadcastInt16x3(FfxInt16 a) +{ + return FfxInt16x3(a, a, a); +} +FfxInt16x4 ffxBroadcastInt16x4(FfxInt16 a) +{ + return FfxInt16x4(a, a, a, a); +} +#define FFX_BROADCAST_INT16(a) FfxInt16(a) +#define FFX_BROADCAST_INT16X2(a) FfxInt16(a) +#define FFX_BROADCAST_INT16X3(a) FfxInt16(a) +#define FFX_BROADCAST_INT16X4(a) FfxInt16(a) +#else +#define FFX_BROADCAST_INT16(a) FFX_MIN16_I(a) +#define FFX_BROADCAST_INT16X2(a) FFX_MIN16_I(a) +#define FFX_BROADCAST_INT16X3(a) FFX_MIN16_I(a) +#define FFX_BROADCAST_INT16X4(a) FFX_MIN16_I(a) +#endif +//------------------------------------------------------------------------------------------------------------------------------ +#if FFX_HLSL_6_2 +FfxUInt16 ffxBroadcastUInt16(FfxUInt16 a) +{ + return FfxUInt16(a); +} +FfxUInt16x2 ffxBroadcastUInt16x2(FfxUInt16 a) +{ + return FfxUInt16x2(a, a); +} +FfxUInt16x3 ffxBroadcastUInt16x3(FfxUInt16 a) +{ + return FfxUInt16x3(a, a, a); +} +FfxUInt16x4 ffxBroadcastUInt16x4(FfxUInt16 a) +{ + return FfxUInt16x4(a, a, a, a); +} +#define FFX_BROADCAST_UINT16(a) FfxUInt16(a) +#define FFX_BROADCAST_UINT16X2(a) FfxUInt16(a) +#define FFX_BROADCAST_UINT16X3(a) FfxUInt16(a) +#define FFX_BROADCAST_UINT16X4(a) FfxUInt16(a) +#else +#define FFX_BROADCAST_UINT16(a) FFX_MIN16_U(a) +#define FFX_BROADCAST_UINT16X2(a) FFX_MIN16_U(a) +#define FFX_BROADCAST_UINT16X3(a) FFX_MIN16_U(a) +#define FFX_BROADCAST_UINT16X4(a) FFX_MIN16_U(a) +#endif +//============================================================================================================================== +FfxUInt16 ffxAbsHalf(FfxUInt16 a) +{ + return FfxUInt16(abs(FfxInt16(a))); +} +FfxUInt16x2 ffxAbsHalf(FfxUInt16x2 a) +{ + return FfxUInt16x2(abs(FfxInt16x2(a))); +} +FfxUInt16x3 ffxAbsHalf(FfxUInt16x3 a) +{ + return FfxUInt16x3(abs(FfxInt16x3(a))); +} +FfxUInt16x4 ffxAbsHalf(FfxUInt16x4 a) +{ + return FfxUInt16x4(abs(FfxInt16x4(a))); +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxFloat16 ffxClampHalf(FfxFloat16 x, FfxFloat16 n, FfxFloat16 m) +{ + return max(n, min(x, m)); +} +FfxFloat16x2 ffxClampHalf(FfxFloat16x2 x, FfxFloat16x2 n, FfxFloat16x2 m) +{ + return max(n, min(x, m)); +} +FfxFloat16x3 ffxClampHalf(FfxFloat16x3 x, FfxFloat16x3 n, FfxFloat16x3 m) +{ + return max(n, min(x, m)); +} +FfxFloat16x4 ffxClampHalf(FfxFloat16x4 x, FfxFloat16x4 n, FfxFloat16x4 m) +{ + return max(n, min(x, m)); +} +//------------------------------------------------------------------------------------------------------------------------------ +// V_FRACT_F16 (note DX frac() is different). +FfxFloat16 ffxFract(FfxFloat16 x) +{ + return x - floor(x); +} +FfxFloat16x2 ffxFract(FfxFloat16x2 x) +{ + return x - floor(x); +} +FfxFloat16x3 ffxFract(FfxFloat16x3 x) +{ + return x - floor(x); +} +FfxFloat16x4 ffxFract(FfxFloat16x4 x) +{ + return x - floor(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxFloat16 ffxLerp(FfxFloat16 x, FfxFloat16 y, FfxFloat16 a) +{ + return lerp(x, y, a); +} +FfxFloat16x2 ffxLerp(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16 a) +{ + return lerp(x, y, a); +} +FfxFloat16x2 ffxLerp(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 a) +{ + return lerp(x, y, a); +} +FfxFloat16x3 ffxLerp(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16 a) +{ + return lerp(x, y, a); +} +FfxFloat16x3 ffxLerp(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 a) +{ + return lerp(x, y, a); +} +FfxFloat16x4 ffxLerp(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16 a) +{ + return lerp(x, y, a); +} +FfxFloat16x4 ffxLerp(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 a) +{ + return lerp(x, y, a); +} +//------------------------------------------------------------------------------------------------------------------------------ +#if FFX_HLSL_6_2 +FFX_MIN16_F ffxLerp(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F t) +{ + return lerp(x, y, t); +} +FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F t) +{ + return lerp(x, y, t); +} +FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 t) +{ + return lerp(x, y, t); +} +FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F t) +{ + return lerp(x, y, t); +} +FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 t) +{ + return lerp(x, y, t); +} +FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F t) +{ + return lerp(x, y, t); +} +FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 t) +{ + return lerp(x, y, t); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxMin(FFX_MIN16_F x, FFX_MIN16_F y) +{ + return min(x, y); +} +FFX_MIN16_F2 ffxMin(FFX_MIN16_F2 x, FFX_MIN16_F2 y) +{ + return min(x, y); +} +FFX_MIN16_F3 ffxMin(FFX_MIN16_F3 x, FFX_MIN16_F3 y) +{ + return min(x, y); +} +FFX_MIN16_F4 ffxMin(FFX_MIN16_F4 x, FFX_MIN16_F4 y) +{ + return min(x, y); +} +//------------------------------------------------------------------------------------------------------------------------------ +FFX_MIN16_F ffxMax(FFX_MIN16_F x, FFX_MIN16_F y) +{ + return max(x, y); +} +FFX_MIN16_F2 ffxMax(FFX_MIN16_F2 x, FFX_MIN16_F2 y) +{ + return max(x, y); +} +FFX_MIN16_F3 ffxMax(FFX_MIN16_F3 x, FFX_MIN16_F3 y) +{ + return max(x, y); +} +FFX_MIN16_F4 ffxMax(FFX_MIN16_F4 x, FFX_MIN16_F4 y) +{ + return max(x, y); +} +#endif +//------------------------------------------------------------------------------------------------------------------------------ +FfxFloat16 ffxMax3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) +{ + return max(x, max(y, z)); +} +FfxFloat16x2 ffxMax3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) +{ + return max(x, max(y, z)); +} +FfxFloat16x3 ffxMax3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) +{ + return max(x, max(y, z)); +} +FfxFloat16x4 ffxMax3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) +{ + return max(x, max(y, z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxFloat16 ffxMin3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) +{ + return min(x, min(y, z)); +} +FfxFloat16x2 ffxMin3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) +{ + return min(x, min(y, z)); +} +FfxFloat16x3 ffxMin3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) +{ + return min(x, min(y, z)); +} +FfxFloat16x4 ffxMin3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) +{ + return min(x, min(y, z)); +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxFloat16 ffxReciprocalHalf(FfxFloat16 x) +{ + return rcp(x); +} +FfxFloat16x2 ffxReciprocalHalf(FfxFloat16x2 x) +{ + return rcp(x); +} +FfxFloat16x3 ffxReciprocalHalf(FfxFloat16x3 x) +{ + return rcp(x); +} +FfxFloat16x4 ffxReciprocalHalf(FfxFloat16x4 x) +{ + return rcp(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxFloat16 ffxReciprocalSquareRootHalf(FfxFloat16 x) +{ + return rsqrt(x); +} +FfxFloat16x2 ffxReciprocalSquareRootHalf(FfxFloat16x2 x) +{ + return rsqrt(x); +} +FfxFloat16x3 ffxReciprocalSquareRootHalf(FfxFloat16x3 x) +{ + return rsqrt(x); +} +FfxFloat16x4 ffxReciprocalSquareRootHalf(FfxFloat16x4 x) +{ + return rsqrt(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxFloat16 ffxSaturate(FfxFloat16 x) +{ + return saturate(x); +} +FfxFloat16x2 ffxSaturate(FfxFloat16x2 x) +{ + return saturate(x); +} +FfxFloat16x3 ffxSaturate(FfxFloat16x3 x) +{ + return saturate(x); +} +FfxFloat16x4 ffxSaturate(FfxFloat16x4 x) +{ + return saturate(x); +} +//------------------------------------------------------------------------------------------------------------------------------ +FfxUInt16 ffxBitShiftRightHalf(FfxUInt16 a, FfxUInt16 b) +{ + return FfxUInt16(FfxInt16(a) >> FfxInt16(b)); +} +FfxUInt16x2 ffxBitShiftRightHalf(FfxUInt16x2 a, FfxUInt16x2 b) +{ + return FfxUInt16x2(FfxInt16x2(a) >> FfxInt16x2(b)); +} +FfxUInt16x3 ffxBitShiftRightHalf(FfxUInt16x3 a, FfxUInt16x3 b) +{ + return FfxUInt16x3(FfxInt16x3(a) >> FfxInt16x3(b)); +} +FfxUInt16x4 ffxBitShiftRightHalf(FfxUInt16x4 a, FfxUInt16x4 b) +{ + return FfxUInt16x4(FfxInt16x4(a) >> FfxInt16x4(b)); +} +#endif // FFX_HALF + +//============================================================================================================================== +// HLSL WAVE +//============================================================================================================================== +#if defined(FFX_WAVE) +// Where 'x' must be a compile time literal. +FfxFloat32 AWaveXorF1(FfxFloat32 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxFloat32x2 AWaveXorF2(FfxFloat32x2 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxFloat32x3 AWaveXorF3(FfxFloat32x3 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxFloat32x4 AWaveXorF4(FfxFloat32x4 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxUInt32 AWaveXorU1(FfxUInt32 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxUInt32x2 AWaveXorU1(FfxUInt32x2 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxUInt32x3 AWaveXorU1(FfxUInt32x3 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} +FfxUInt32x4 AWaveXorU1(FfxUInt32x4 v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); +} + +#if FFX_HALF +FfxFloat16x2 ffxWaveXorFloat16x2(FfxFloat16x2 v, FfxUInt32 x) +{ + return FFX_UINT32_TO_FLOAT16X2(WaveReadLaneAt(FFX_FLOAT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x)); +} +FfxFloat16x4 ffxWaveXorFloat16x4(FfxFloat16x4 v, FfxUInt32 x) +{ + return FFX_UINT32X2_TO_FLOAT16X4(WaveReadLaneAt(FFX_FLOAT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x)); +} +FfxUInt16x2 ffxWaveXorUint16x2(FfxUInt16x2 v, FfxUInt32 x) +{ + return FFX_UINT32_TO_UINT16X2(WaveReadLaneAt(FFX_UINT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x)); +} +FfxUInt16x4 ffxWaveXorUint16x4(FfxUInt16x4 v, FfxUInt32 x) +{ + return AW4_FFX_UINT32(WaveReadLaneAt(FFX_UINT32_AW4(v), WaveGetLaneIndex() ^ x)); +} +#endif // FFX_HALF +#endif // #if defined(FFX_WAVE) diff --git a/Assets/Shaders/FSR2/ffx_core_hlsl.h.meta b/Assets/Shaders/FSR2/ffx_core_hlsl.h.meta new file mode 100644 index 0000000..8c6204a --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_core_hlsl.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 30b6dcd5041405a468ec871a19b8fee4 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_core_portability.h b/Assets/Shaders/FSR2/ffx_core_portability.h new file mode 100644 index 0000000..f0d3fd7 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_core_portability.h @@ -0,0 +1,50 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +FfxFloat32x3 opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +{ + d = a + ffxBroadcast3(b); + return d; +} + +FfxFloat32x3 opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a) +{ + d = a; + return d; +} + +FfxFloat32x3 opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b) +{ + d = a * b; + return d; +} + +FfxFloat32x3 opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +{ + d = a * ffxBroadcast3(b); + return d; +} + +FfxFloat32x3 opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a) +{ + d = rcp(a); + return d; +} diff --git a/Assets/Shaders/FSR2/ffx_core_portability.h.meta b/Assets/Shaders/FSR2/ffx_core_portability.h.meta new file mode 100644 index 0000000..fe8114a --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_core_portability.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 6370ecd01d478c543865dc098810590c +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr1.h b/Assets/Shaders/FSR2/ffx_fsr1.h new file mode 100644 index 0000000..0636247 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr1.h @@ -0,0 +1,1246 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +/// Setup required constant values for EASU (works on CPU or GPU). +/// +/// @param [out] con0 +/// @param [out] con1 +/// @param [out] con2 +/// @param [out] con3 +/// @param [in] inputViewportInPixelsX The rendered image resolution being upscaled in X dimension. +/// @param [in] inputViewportInPixelsY The rendered image resolution being upscaled in Y dimension. +/// @param [in] inputSizeInPixelsX The resolution of the resource containing the input image (useful for dynamic resolution) in X dimension. +/// @param [in] inputSizeInPixelsY The resolution of the resource containing the input image (useful for dynamic resolution) in Y dimension. +/// @param [in] outputSizeInPixelsX The display resolution which the input image gets upscaled to in X dimension. +/// @param [in] outputSizeInPixelsY The display resolution which the input image gets upscaled to in Y dimension. +/// +/// @ingroup FSR1 +FFX_STATIC void ffxFsrPopulateEasuConstants( + FFX_PARAMETER_INOUT FfxUInt32x4 con0, + FFX_PARAMETER_INOUT FfxUInt32x4 con1, + FFX_PARAMETER_INOUT FfxUInt32x4 con2, + FFX_PARAMETER_INOUT FfxUInt32x4 con3, + FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsX, + FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsY, + FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsX, + FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsY, + FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsX, + FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsY) +{ + // Output integer position to a pixel position in viewport. + con0[0] = ffxAsUInt32(inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX)); + con0[1] = ffxAsUInt32(inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY)); + con0[2] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX) - FfxFloat32(0.5)); + con0[3] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY) - FfxFloat32(0.5)); + + // Viewport pixel position to normalized image space. + // This is used to get upper-left of 'F' tap. + con1[0] = ffxAsUInt32(ffxReciprocal(inputSizeInPixelsX)); + con1[1] = ffxAsUInt32(ffxReciprocal(inputSizeInPixelsY)); + + // Centers of gather4, first offset from upper-left of 'F'. + // +---+---+ + // | | | + // +--(0)--+ + // | b | c | + // +---F---+---+---+ + // | e | f | g | h | + // +--(1)--+--(2)--+ + // | i | j | k | l | + // +---+---+---+---+ + // | n | o | + // +--(3)--+ + // | | | + // +---+---+ + con1[2] = ffxAsUInt32(FfxFloat32(1.0) * ffxReciprocal(inputSizeInPixelsX)); + con1[3] = ffxAsUInt32(FfxFloat32(-1.0) * ffxReciprocal(inputSizeInPixelsY)); + + // These are from (0) instead of 'F'. + con2[0] = ffxAsUInt32(FfxFloat32(-1.0) * ffxReciprocal(inputSizeInPixelsX)); + con2[1] = ffxAsUInt32(FfxFloat32(2.0) * ffxReciprocal(inputSizeInPixelsY)); + con2[2] = ffxAsUInt32(FfxFloat32(1.0) * ffxReciprocal(inputSizeInPixelsX)); + con2[3] = ffxAsUInt32(FfxFloat32(2.0) * ffxReciprocal(inputSizeInPixelsY)); + con3[0] = ffxAsUInt32(FfxFloat32(0.0) * ffxReciprocal(inputSizeInPixelsX)); + con3[1] = ffxAsUInt32(FfxFloat32(4.0) * ffxReciprocal(inputSizeInPixelsY)); + con3[2] = con3[3] = 0; +} + +/// Setup required constant values for EASU (works on CPU or GPU). +/// +/// @param [out] con0 +/// @param [out] con1 +/// @param [out] con2 +/// @param [out] con3 +/// @param [in] inputViewportInPixelsX The resolution of the input in the X dimension. +/// @param [in] inputViewportInPixelsY The resolution of the input in the Y dimension. +/// @param [in] inputSizeInPixelsX The input size in pixels in the X dimension. +/// @param [in] inputSizeInPixelsY The input size in pixels in the Y dimension. +/// @param [in] outputSizeInPixelsX The output size in pixels in the X dimension. +/// @param [in] outputSizeInPixelsY The output size in pixels in the Y dimension. +/// @param [in] inputOffsetInPixelsX The input image offset in the X dimension into the resource containing it (useful for dynamic resolution). +/// @param [in] inputOffsetInPixelsY The input image offset in the Y dimension into the resource containing it (useful for dynamic resolution). +/// +/// @ingroup FSR1 +FFX_STATIC void ffxFsrPopulateEasuConstantsOffset( + FFX_PARAMETER_INOUT FfxUInt32x4 con0, + FFX_PARAMETER_INOUT FfxUInt32x4 con1, + FFX_PARAMETER_INOUT FfxUInt32x4 con2, + FFX_PARAMETER_INOUT FfxUInt32x4 con3, + FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsX, + FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsY, + FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsX, + FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsY, + FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsX, + FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsY, + FFX_PARAMETER_IN FfxFloat32 inputOffsetInPixelsX, + FFX_PARAMETER_IN FfxFloat32 inputOffsetInPixelsY) +{ + ffxFsrPopulateEasuConstants( + con0, + con1, + con2, + con3, + inputViewportInPixelsX, + inputViewportInPixelsY, + inputSizeInPixelsX, + inputSizeInPixelsY, + outputSizeInPixelsX, + outputSizeInPixelsY); + + // override + con0[2] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX) - FfxFloat32(0.5) + inputOffsetInPixelsX); + con0[3] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY) - FfxFloat32(0.5) + inputOffsetInPixelsY); +} + +#if defined(FFX_GPU) && defined(FFX_FSR_EASU_FLOAT) +// Input callback prototypes, need to be implemented by calling shader +FfxFloat32x4 FsrEasuRF(FfxFloat32x2 p); +FfxFloat32x4 FsrEasuGF(FfxFloat32x2 p); +FfxFloat32x4 FsrEasuBF(FfxFloat32x2 p); + +// Filtering for a given tap for the scalar. +void fsrEasuTapFloat( + FFX_PARAMETER_INOUT FfxFloat32x3 accumulatedColor, // Accumulated color, with negative lobe. + FFX_PARAMETER_INOUT FfxFloat32 accumulatedWeight, // Accumulated weight. + FFX_PARAMETER_IN FfxFloat32x2 pixelOffset, // Pixel offset from resolve position to tap. + FFX_PARAMETER_IN FfxFloat32x2 gradientDirection, // Gradient direction. + FFX_PARAMETER_IN FfxFloat32x2 length, // Length. + FFX_PARAMETER_IN FfxFloat32 negativeLobeStrength, // Negative lobe strength. + FFX_PARAMETER_IN FfxFloat32 clippingPoint, // Clipping point. + FFX_PARAMETER_IN FfxFloat32x3 color) // Tap color. +{ + // Rotate offset by direction. + FfxFloat32x2 rotatedOffset; + rotatedOffset.x = (pixelOffset.x * (gradientDirection.x)) + (pixelOffset.y * gradientDirection.y); + rotatedOffset.y = (pixelOffset.x * (-gradientDirection.y)) + (pixelOffset.y * gradientDirection.x); + + // Anisotropy. + rotatedOffset *= length; + + // Compute distance^2. + FfxFloat32 distanceSquared = rotatedOffset.x * rotatedOffset.x + rotatedOffset.y * rotatedOffset.y; + + // Limit to the window as at corner, 2 taps can easily be outside. + distanceSquared = ffxMin(distanceSquared, clippingPoint); + + // Approximation of lancos2 without sin() or rcp(), or sqrt() to get x. + // (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2 + // |_______________________________________| |_______________| + // base window + // The general form of the 'base' is, + // (a*(b*x^2-1)^2-(a-1)) + // Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe. + FfxFloat32 weightB = FfxFloat32(2.0 / 5.0) * distanceSquared + FfxFloat32(-1.0); + FfxFloat32 weightA = negativeLobeStrength * distanceSquared + FfxFloat32(-1.0); + weightB *= weightB; + weightA *= weightA; + weightB = FfxFloat32(25.0 / 16.0) * weightB + FfxFloat32(-(25.0 / 16.0 - 1.0)); + FfxFloat32 weight = weightB * weightA; + + // Do weighted average. + accumulatedColor += color * weight; + accumulatedWeight += weight; +} + +// Accumulate direction and length. +void fsrEasuSetFloat( + FFX_PARAMETER_INOUT FfxFloat32x2 direction, + FFX_PARAMETER_INOUT FfxFloat32 length, + FFX_PARAMETER_IN FfxFloat32x2 pp, + FFX_PARAMETER_IN FfxBoolean biS, + FFX_PARAMETER_IN FfxBoolean biT, + FFX_PARAMETER_IN FfxBoolean biU, + FFX_PARAMETER_IN FfxBoolean biV, + FFX_PARAMETER_IN FfxFloat32 lA, + FFX_PARAMETER_IN FfxFloat32 lB, + FFX_PARAMETER_IN FfxFloat32 lC, + FFX_PARAMETER_IN FfxFloat32 lD, + FFX_PARAMETER_IN FfxFloat32 lE) +{ + // Compute bilinear weight, branches factor out as predicates are compiler time immediates. + // s t + // u v + FfxFloat32 weight = FfxFloat32(0.0); + if (biS) + weight = (FfxFloat32(1.0) - pp.x) * (FfxFloat32(1.0) - pp.y); + if (biT) + weight = pp.x * (FfxFloat32(1.0) - pp.y); + if (biU) + weight = (FfxFloat32(1.0) - pp.x) * pp.y; + if (biV) + weight = pp.x * pp.y; + + // Direction is the '+' diff. + // a + // b c d + // e + // Then takes magnitude from abs average of both sides of 'c'. + // Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms. + FfxFloat32 dc = lD - lC; + FfxFloat32 cb = lC - lB; + FfxFloat32 lengthX = max(abs(dc), abs(cb)); + lengthX = ffxApproximateReciprocal(lengthX); + FfxFloat32 directionX = lD - lB; + direction.x += directionX * weight; + lengthX = ffxSaturate(abs(directionX) * lengthX); + lengthX *= lengthX; + length += lengthX * weight; + + // Repeat for the y axis. + FfxFloat32 ec = lE - lC; + FfxFloat32 ca = lC - lA; + FfxFloat32 lengthY = max(abs(ec), abs(ca)); + lengthY = ffxApproximateReciprocal(lengthY); + FfxFloat32 directionY = lE - lA; + direction.y += directionY * weight; + lengthY = ffxSaturate(abs(directionY) * lengthY); + lengthY *= lengthY; + length += lengthY * weight; +} + +/// Apply edge-aware spatial upsampling using 32bit floating point precision calculations. +/// +/// @param [out] outPixel The computed color of a pixel. +/// @param [in] integerPosition Integer pixel position within the output. +/// @param [in] con0 The first constant value generated by ffxFsrPopulateEasuConstants. +/// @param [in] con1 The second constant value generated by ffxFsrPopulateEasuConstants. +/// @param [in] con2 The third constant value generated by ffxFsrPopulateEasuConstants. +/// @param [in] con3 The fourth constant value generated by ffxFsrPopulateEasuConstants. +/// +/// @ingroup FSR +void ffxFsrEasuFloat( + FFX_PARAMETER_OUT FfxFloat32x3 pix, + FFX_PARAMETER_IN FfxUInt32x2 ip, + FFX_PARAMETER_IN FfxUInt32x4 con0, + FFX_PARAMETER_IN FfxUInt32x4 con1, + FFX_PARAMETER_IN FfxUInt32x4 con2, + FFX_PARAMETER_IN FfxUInt32x4 con3) +{ + // Get position of 'f'. + FfxFloat32x2 pp = FfxFloat32x2(ip) * ffxAsFloat(con0.xy) + ffxAsFloat(con0.zw); + FfxFloat32x2 fp = floor(pp); + pp -= fp; + + // 12-tap kernel. + // b c + // e f g h + // i j k l + // n o + // Gather 4 ordering. + // a b + // r g + // For packed FP16, need either {rg} or {ab} so using the following setup for gather in all versions, + // a b <- unused (z) + // r g + // a b a b + // r g r g + // a b + // r g <- unused (z) + // Allowing dead-code removal to remove the 'z's. + FfxFloat32x2 p0 = fp * ffxAsFloat(con1.xy) + ffxAsFloat(con1.zw); + + // These are from p0 to avoid pulling two constants on pre-Navi hardware. + FfxFloat32x2 p1 = p0 + ffxAsFloat(con2.xy); + FfxFloat32x2 p2 = p0 + ffxAsFloat(con2.zw); + FfxFloat32x2 p3 = p0 + ffxAsFloat(con3.xy); + FfxFloat32x4 bczzR = FsrEasuRF(p0); + FfxFloat32x4 bczzG = FsrEasuGF(p0); + FfxFloat32x4 bczzB = FsrEasuBF(p0); + FfxFloat32x4 ijfeR = FsrEasuRF(p1); + FfxFloat32x4 ijfeG = FsrEasuGF(p1); + FfxFloat32x4 ijfeB = FsrEasuBF(p1); + FfxFloat32x4 klhgR = FsrEasuRF(p2); + FfxFloat32x4 klhgG = FsrEasuGF(p2); + FfxFloat32x4 klhgB = FsrEasuBF(p2); + FfxFloat32x4 zzonR = FsrEasuRF(p3); + FfxFloat32x4 zzonG = FsrEasuGF(p3); + FfxFloat32x4 zzonB = FsrEasuBF(p3); + + // Simplest multi-channel approximate luma possible (luma times 2, in 2 FMA/MAD). + FfxFloat32x4 bczzL = bczzB * ffxBroadcast4(0.5) + (bczzR * ffxBroadcast4(0.5) + bczzG); + FfxFloat32x4 ijfeL = ijfeB * ffxBroadcast4(0.5) + (ijfeR * ffxBroadcast4(0.5) + ijfeG); + FfxFloat32x4 klhgL = klhgB * ffxBroadcast4(0.5) + (klhgR * ffxBroadcast4(0.5) + klhgG); + FfxFloat32x4 zzonL = zzonB * ffxBroadcast4(0.5) + (zzonR * ffxBroadcast4(0.5) + zzonG); + + // Rename. + FfxFloat32 bL = bczzL.x; + FfxFloat32 cL = bczzL.y; + FfxFloat32 iL = ijfeL.x; + FfxFloat32 jL = ijfeL.y; + FfxFloat32 fL = ijfeL.z; + FfxFloat32 eL = ijfeL.w; + FfxFloat32 kL = klhgL.x; + FfxFloat32 lL = klhgL.y; + FfxFloat32 hL = klhgL.z; + FfxFloat32 gL = klhgL.w; + FfxFloat32 oL = zzonL.z; + FfxFloat32 nL = zzonL.w; + + // Accumulate for bilinear interpolation. + FfxFloat32x2 dir = ffxBroadcast2(0.0); + FfxFloat32 len = FfxFloat32(0.0); + fsrEasuSetFloat(dir, len, pp, FFX_TRUE, FFX_FALSE, FFX_FALSE, FFX_FALSE, bL, eL, fL, gL, jL); + fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_TRUE, FFX_FALSE, FFX_FALSE, cL, fL, gL, hL, kL); + fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_FALSE, FFX_TRUE, FFX_FALSE, fL, iL, jL, kL, nL); + fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_FALSE, FFX_FALSE, FFX_TRUE, gL, jL, kL, lL, oL); + + // Normalize with approximation, and cleanup close to zero. + FfxFloat32x2 dir2 = dir * dir; + FfxFloat32 dirR = dir2.x + dir2.y; + FfxUInt32 zro = dirR < FfxFloat32(1.0 / 32768.0); + dirR = ffxApproximateReciprocalSquareRoot(dirR); + dirR = zro ? FfxFloat32(1.0) : dirR; + dir.x = zro ? FfxFloat32(1.0) : dir.x; + dir *= ffxBroadcast2(dirR); + + // Transform from {0 to 2} to {0 to 1} range, and shape with square. + len = len * FfxFloat32(0.5); + len *= len; + + // Stretch kernel {1.0 vert|horz, to sqrt(2.0) on diagonal}. + FfxFloat32 stretch = (dir.x * dir.x + dir.y * dir.y) * ffxApproximateReciprocal(max(abs(dir.x), abs(dir.y))); + + // Anisotropic length after rotation, + // x := 1.0 lerp to 'stretch' on edges + // y := 1.0 lerp to 2x on edges + FfxFloat32x2 len2 = FfxFloat32x2(FfxFloat32(1.0) + (stretch - FfxFloat32(1.0)) * len, FfxFloat32(1.0) + FfxFloat32(-0.5) * len); + + // Based on the amount of 'edge', + // the window shifts from +/-{sqrt(2.0) to slightly beyond 2.0}. + FfxFloat32 lob = FfxFloat32(0.5) + FfxFloat32((1.0 / 4.0 - 0.04) - 0.5) * len; + + // Set distance^2 clipping point to the end of the adjustable window. + FfxFloat32 clp = ffxApproximateReciprocal(lob); + + // Accumulation mixed with min/max of 4 nearest. + // b c + // e f g h + // i j k l + // n o + FfxFloat32x3 min4 = + ffxMin(ffxMin3(FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z), FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w), FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)), + FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x)); + FfxFloat32x3 max4 = + max(ffxMax3(FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z), FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w), FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)), FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x)); + + // Accumulation. + FfxFloat32x3 aC = ffxBroadcast3(0.0); + FfxFloat32 aW = FfxFloat32(0.0); + fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, -1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(bczzR.x, bczzG.x, bczzB.x)); // b + fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, -1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(bczzR.y, bczzG.y, bczzB.y)); // c + fsrEasuTapFloat(aC, aW, FfxFloat32x2(-1.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.x, ijfeG.x, ijfeB.x)); // i + fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)); // j + fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z)); // f + fsrEasuTapFloat(aC, aW, FfxFloat32x2(-1.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.w, ijfeG.w, ijfeB.w)); // e + fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x)); // k + fsrEasuTapFloat(aC, aW, FfxFloat32x2(2.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.y, klhgG.y, klhgB.y)); // l + fsrEasuTapFloat(aC, aW, FfxFloat32x2(2.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.z, klhgG.z, klhgB.z)); // h + fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w)); // g + fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.z, zzonG.z, zzonB.z)); // o + fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.w, zzonG.w, zzonB.w)); // n + + // Normalize and dering. + pix = ffxMin(max4, max(min4, aC * ffxBroadcast3(rcp(aW)))); +} +#endif // #if defined(FFX_GPU) && defined(FFX_FSR_EASU_FLOAT) + +#if defined(FFX_GPU) && FFX_HALF == 1 && defined(FFX_FSR_EASU_HALF) +// Input callback prototypes, need to be implemented by calling shader +FfxFloat16x4 FsrEasuRH(FfxFloat32x2 p); +FfxFloat16x4 FsrEasuGH(FfxFloat32x2 p); +FfxFloat16x4 FsrEasuBH(FfxFloat32x2 p); + +// This runs 2 taps in parallel. +void FsrEasuTapH( + FFX_PARAMETER_INOUT FfxFloat16x2 aCR, + FFX_PARAMETER_INOUT FfxFloat16x2 aCG, + FFX_PARAMETER_INOUT FfxFloat16x2 aCB, + FFX_PARAMETER_INOUT FfxFloat16x2 aW, + FFX_PARAMETER_IN FfxFloat16x2 offX, + FFX_PARAMETER_IN FfxFloat16x2 offY, + FFX_PARAMETER_IN FfxFloat16x2 dir, + FFX_PARAMETER_IN FfxFloat16x2 len, + FFX_PARAMETER_IN FfxFloat16 lob, + FFX_PARAMETER_IN FfxFloat16 clp, + FFX_PARAMETER_IN FfxFloat16x2 cR, + FFX_PARAMETER_IN FfxFloat16x2 cG, + FFX_PARAMETER_IN FfxFloat16x2 cB) +{ + FfxFloat16x2 vX, vY; + vX = offX * dir.xx + offY * dir.yy; + vY = offX * (-dir.yy) + offY * dir.xx; + vX *= len.x; + vY *= len.y; + FfxFloat16x2 d2 = vX * vX + vY * vY; + d2 = min(d2, FFX_BROADCAST_FLOAT16X2(clp)); + FfxFloat16x2 wB = FFX_BROADCAST_FLOAT16X2(2.0 / 5.0) * d2 + FFX_BROADCAST_FLOAT16X2(-1.0); + FfxFloat16x2 wA = FFX_BROADCAST_FLOAT16X2(lob) * d2 + FFX_BROADCAST_FLOAT16X2(-1.0); + wB *= wB; + wA *= wA; + wB = FFX_BROADCAST_FLOAT16X2(25.0 / 16.0) * wB + FFX_BROADCAST_FLOAT16X2(-(25.0 / 16.0 - 1.0)); + FfxFloat16x2 w = wB * wA; + aCR += cR * w; + aCG += cG * w; + aCB += cB * w; + aW += w; +} + +// This runs 2 taps in parallel. +void FsrEasuSetH( + FFX_PARAMETER_INOUT FfxFloat16x2 dirPX, + FFX_PARAMETER_INOUT FfxFloat16x2 dirPY, + FFX_PARAMETER_INOUT FfxFloat16x2 lenP, + FFX_PARAMETER_IN FfxFloat16x2 pp, + FFX_PARAMETER_IN FfxBoolean biST, + FFX_PARAMETER_IN FfxBoolean biUV, + FFX_PARAMETER_IN FfxFloat16x2 lA, + FFX_PARAMETER_IN FfxFloat16x2 lB, + FFX_PARAMETER_IN FfxFloat16x2 lC, + FFX_PARAMETER_IN FfxFloat16x2 lD, + FFX_PARAMETER_IN FfxFloat16x2 lE) +{ + FfxFloat16x2 w = FFX_BROADCAST_FLOAT16X2(0.0); + + if (biST) + w = (FfxFloat16x2(1.0, 0.0) + FfxFloat16x2(-pp.x, pp.x)) * FFX_BROADCAST_FLOAT16X2(FFX_BROADCAST_FLOAT16(1.0) - pp.y); + + if (biUV) + w = (FfxFloat16x2(1.0, 0.0) + FfxFloat16x2(-pp.x, pp.x)) * FFX_BROADCAST_FLOAT16X2(pp.y); + + // ABS is not free in the packed FP16 path. + FfxFloat16x2 dc = lD - lC; + FfxFloat16x2 cb = lC - lB; + FfxFloat16x2 lenX = max(abs(dc), abs(cb)); + lenX = ffxReciprocalHalf(lenX); + + FfxFloat16x2 dirX = lD - lB; + dirPX += dirX * w; + lenX = ffxSaturate(abs(dirX) * lenX); + lenX *= lenX; + lenP += lenX * w; + FfxFloat16x2 ec = lE - lC; + FfxFloat16x2 ca = lC - lA; + FfxFloat16x2 lenY = max(abs(ec), abs(ca)); + lenY = ffxReciprocalHalf(lenY); + FfxFloat16x2 dirY = lE - lA; + dirPY += dirY * w; + lenY = ffxSaturate(abs(dirY) * lenY); + lenY *= lenY; + lenP += lenY * w; +} + +void FsrEasuH( + FFX_PARAMETER_OUT FfxFloat16x3 pix, + FFX_PARAMETER_IN FfxUInt32x2 ip, + FFX_PARAMETER_IN FfxUInt32x4 con0, + FFX_PARAMETER_IN FfxUInt32x4 con1, + FFX_PARAMETER_IN FfxUInt32x4 con2, + FFX_PARAMETER_IN FfxUInt32x4 con3) +{ + FfxFloat32x2 pp = FfxFloat32x2(ip) * ffxAsFloat(con0.xy) + ffxAsFloat(con0.zw); + FfxFloat32x2 fp = floor(pp); + pp -= fp; + FfxFloat16x2 ppp = FfxFloat16x2(pp); + + FfxFloat32x2 p0 = fp * ffxAsFloat(con1.xy) + ffxAsFloat(con1.zw); + FfxFloat32x2 p1 = p0 + ffxAsFloat(con2.xy); + FfxFloat32x2 p2 = p0 + ffxAsFloat(con2.zw); + FfxFloat32x2 p3 = p0 + ffxAsFloat(con3.xy); + FfxFloat16x4 bczzR = FsrEasuRH(p0); + FfxFloat16x4 bczzG = FsrEasuGH(p0); + FfxFloat16x4 bczzB = FsrEasuBH(p0); + FfxFloat16x4 ijfeR = FsrEasuRH(p1); + FfxFloat16x4 ijfeG = FsrEasuGH(p1); + FfxFloat16x4 ijfeB = FsrEasuBH(p1); + FfxFloat16x4 klhgR = FsrEasuRH(p2); + FfxFloat16x4 klhgG = FsrEasuGH(p2); + FfxFloat16x4 klhgB = FsrEasuBH(p2); + FfxFloat16x4 zzonR = FsrEasuRH(p3); + FfxFloat16x4 zzonG = FsrEasuGH(p3); + FfxFloat16x4 zzonB = FsrEasuBH(p3); + + FfxFloat16x4 bczzL = bczzB * FFX_BROADCAST_FLOAT16X4(0.5) + (bczzR * FFX_BROADCAST_FLOAT16X4(0.5) + bczzG); + FfxFloat16x4 ijfeL = ijfeB * FFX_BROADCAST_FLOAT16X4(0.5) + (ijfeR * FFX_BROADCAST_FLOAT16X4(0.5) + ijfeG); + FfxFloat16x4 klhgL = klhgB * FFX_BROADCAST_FLOAT16X4(0.5) + (klhgR * FFX_BROADCAST_FLOAT16X4(0.5) + klhgG); + FfxFloat16x4 zzonL = zzonB * FFX_BROADCAST_FLOAT16X4(0.5) + (zzonR * FFX_BROADCAST_FLOAT16X4(0.5) + zzonG); + FfxFloat16 bL = bczzL.x; + FfxFloat16 cL = bczzL.y; + FfxFloat16 iL = ijfeL.x; + FfxFloat16 jL = ijfeL.y; + FfxFloat16 fL = ijfeL.z; + FfxFloat16 eL = ijfeL.w; + FfxFloat16 kL = klhgL.x; + FfxFloat16 lL = klhgL.y; + FfxFloat16 hL = klhgL.z; + FfxFloat16 gL = klhgL.w; + FfxFloat16 oL = zzonL.z; + FfxFloat16 nL = zzonL.w; + + // This part is different, accumulating 2 taps in parallel. + FfxFloat16x2 dirPX = FFX_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 dirPY = FFX_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 lenP = FFX_BROADCAST_FLOAT16X2(0.0); + FsrEasuSetH(dirPX, + dirPY, + lenP, + ppp, + FfxUInt32(true), + FfxUInt32(false), + FfxFloat16x2(bL, cL), + FfxFloat16x2(eL, fL), + FfxFloat16x2(fL, gL), + FfxFloat16x2(gL, hL), + FfxFloat16x2(jL, kL)); + FsrEasuSetH(dirPX, + dirPY, + lenP, + ppp, + FfxUInt32(false), + FfxUInt32(true), + FfxFloat16x2(fL, gL), + FfxFloat16x2(iL, jL), + FfxFloat16x2(jL, kL), + FfxFloat16x2(kL, lL), + FfxFloat16x2(nL, oL)); + FfxFloat16x2 dir = FfxFloat16x2(dirPX.r + dirPX.g, dirPY.r + dirPY.g); + FfxFloat16 len = lenP.r + lenP.g; + + FfxFloat16x2 dir2 = dir * dir; + FfxFloat16 dirR = dir2.x + dir2.y; + FfxBoolean zro = FfxBoolean(dirR < FFX_BROADCAST_FLOAT16(1.0 / 32768.0)); + dirR = ffxApproximateReciprocalSquareRootHalf(dirR); + dirR = (zro > 0) ? FFX_BROADCAST_FLOAT16(1.0) : dirR; + dir.x = (zro > 0) ? FFX_BROADCAST_FLOAT16(1.0) : dir.x; + dir *= FFX_BROADCAST_FLOAT16X2(dirR); + len = len * FFX_BROADCAST_FLOAT16(0.5); + len *= len; + FfxFloat16 stretch = (dir.x * dir.x + dir.y * dir.y) * ffxApproximateReciprocalHalf(max(abs(dir.x), abs(dir.y))); + FfxFloat16x2 len2 = + FfxFloat16x2(FFX_BROADCAST_FLOAT16(1.0) + (stretch - FFX_BROADCAST_FLOAT16(1.0)) * len, FFX_BROADCAST_FLOAT16(1.0) + FFX_BROADCAST_FLOAT16(-0.5) * len); + FfxFloat16 lob = FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16((1.0 / 4.0 - 0.04) - 0.5) * len; + FfxFloat16 clp = ffxApproximateReciprocalHalf(lob); + + // FP16 is different, using packed trick to do min and max in same operation. + FfxFloat16x2 bothR = + max(max(FfxFloat16x2(-ijfeR.z, ijfeR.z), FfxFloat16x2(-klhgR.w, klhgR.w)), max(FfxFloat16x2(-ijfeR.y, ijfeR.y), FfxFloat16x2(-klhgR.x, klhgR.x))); + FfxFloat16x2 bothG = + max(max(FfxFloat16x2(-ijfeG.z, ijfeG.z), FfxFloat16x2(-klhgG.w, klhgG.w)), max(FfxFloat16x2(-ijfeG.y, ijfeG.y), FfxFloat16x2(-klhgG.x, klhgG.x))); + FfxFloat16x2 bothB = + max(max(FfxFloat16x2(-ijfeB.z, ijfeB.z), FfxFloat16x2(-klhgB.w, klhgB.w)), max(FfxFloat16x2(-ijfeB.y, ijfeB.y), FfxFloat16x2(-klhgB.x, klhgB.x))); + + // This part is different for FP16, working pairs of taps at a time. + FfxFloat16x2 pR = FFX_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 pG = FFX_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 pB = FFX_BROADCAST_FLOAT16X2(0.0); + FfxFloat16x2 pW = FFX_BROADCAST_FLOAT16X2(0.0); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(0.0, 1.0) - ppp.xx, FfxFloat16x2(-1.0, -1.0) - ppp.yy, dir, len2, lob, clp, bczzR.xy, bczzG.xy, bczzB.xy); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(-1.0, 0.0) - ppp.xx, FfxFloat16x2(1.0, 1.0) - ppp.yy, dir, len2, lob, clp, ijfeR.xy, ijfeG.xy, ijfeB.xy); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(0.0, -1.0) - ppp.xx, FfxFloat16x2(0.0, 0.0) - ppp.yy, dir, len2, lob, clp, ijfeR.zw, ijfeG.zw, ijfeB.zw); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(1.0, 2.0) - ppp.xx, FfxFloat16x2(1.0, 1.0) - ppp.yy, dir, len2, lob, clp, klhgR.xy, klhgG.xy, klhgB.xy); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(2.0, 1.0) - ppp.xx, FfxFloat16x2(0.0, 0.0) - ppp.yy, dir, len2, lob, clp, klhgR.zw, klhgG.zw, klhgB.zw); + FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(1.0, 0.0) - ppp.xx, FfxFloat16x2(2.0, 2.0) - ppp.yy, dir, len2, lob, clp, zzonR.zw, zzonG.zw, zzonB.zw); + FfxFloat16x3 aC = FfxFloat16x3(pR.x + pR.y, pG.x + pG.y, pB.x + pB.y); + FfxFloat16 aW = pW.x + pW.y; + + // Slightly different for FP16 version due to combined min and max. + pix = min(FfxFloat16x3(bothR.y, bothG.y, bothB.y), max(-FfxFloat16x3(bothR.x, bothG.x, bothB.x), aC * FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(aW)))); +} +#endif // #if defined(FFX_GPU) && defined(FFX_HALF) && defined(FFX_FSR_EASU_HALF) + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [RCAS] ROBUST CONTRAST ADAPTIVE SHARPENING +// +//------------------------------------------------------------------------------------------------------------------------------ +// CAS uses a simplified mechanism to convert local contrast into a variable amount of sharpness. +// RCAS uses a more exact mechanism, solving for the maximum local sharpness possible before clipping. +// RCAS also has a built in process to limit sharpening of what it detects as possible noise. +// RCAS sharper does not support scaling, as it should be applied after EASU scaling. +// Pass EASU output straight into RCAS, no color conversions necessary. +//------------------------------------------------------------------------------------------------------------------------------ +// RCAS is based on the following logic. +// RCAS uses a 5 tap filter in a cross pattern (same as CAS), +// w n +// w 1 w for taps w m e +// w s +// Where 'w' is the negative lobe weight. +// output = (w*(n+e+w+s)+m)/(4*w+1) +// RCAS solves for 'w' by seeing where the signal might clip out of the {0 to 1} input range, +// 0 == (w*(n+e+w+s)+m)/(4*w+1) -> w = -m/(n+e+w+s) +// 1 == (w*(n+e+w+s)+m)/(4*w+1) -> w = (1-m)/(n+e+w+s-4*1) +// Then chooses the 'w' which results in no clipping, limits 'w', and multiplies by the 'sharp' amount. +// This solution above has issues with MSAA input as the steps along the gradient cause edge detection issues. +// So RCAS uses 4x the maximum and 4x the minimum (depending on equation)in place of the individual taps. +// As well as switching from 'm' to either the minimum or maximum (depending on side), to help in energy conservation. +// This stabilizes RCAS. +// RCAS does a simple highpass which is normalized against the local contrast then shaped, +// 0.25 +// 0.25 -1 0.25 +// 0.25 +// This is used as a noise detection filter, to reduce the effect of RCAS on grain, and focus on real edges. +// +// GLSL example for the required callbacks : +// +// FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p){return FfxFloat16x4(imageLoad(imgSrc,FfxInt32x2(p)));} +// void FsrRcasInputH(inout FfxFloat16 r,inout FfxFloat16 g,inout FfxFloat16 b) +// { +// //do any simple input color conversions here or leave empty if none needed +// } +// +// FsrRcasCon need to be called from the CPU or GPU to set up constants. +// Including a GPU example here, the 'con' value would be stored out to a constant buffer. +// +// FfxUInt32x4 con; +// FsrRcasCon(con, +// 0.0); // The scale is {0.0 := maximum sharpness, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. +// --------------- +// RCAS sharpening supports a CAS-like pass-through alpha via, +// #define FSR_RCAS_PASSTHROUGH_ALPHA 1 +// RCAS also supports a define to enable a more expensive path to avoid some sharpening of noise. +// Would suggest it is better to apply film grain after RCAS sharpening (and after scaling) instead of using this define, +// #define FSR_RCAS_DENOISE 1 +//============================================================================================================================== +// This is set at the limit of providing unnatural results for sharpening. +#define FSR_RCAS_LIMIT (0.25-(1.0/16.0)) +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// CONSTANT SETUP +//============================================================================================================================== +// Call to setup required constant values (works on CPU or GPU). + FFX_STATIC void FsrRcasCon(FfxUInt32x4 con, + // The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. + FfxFloat32 sharpness) + { + // Transform from stops to linear value. + sharpness = exp2(-sharpness); + FfxFloat32x2 hSharp = {sharpness, sharpness}; + con[0] = ffxAsUInt32(sharpness); + con[1] = packHalf2x16(hSharp); + con[2] = 0; + con[3] = 0; + } + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// NON-PACKED 32-BIT VERSION +//============================================================================================================================== +#if defined(FFX_GPU)&&defined(FSR_RCAS_F) + // Input callback prototypes that need to be implemented by calling shader + FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p); + void FsrRcasInputF(inout FfxFloat32 r,inout FfxFloat32 g,inout FfxFloat32 b); +//------------------------------------------------------------------------------------------------------------------------------ + void FsrRcasF(out FfxFloat32 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy. + out FfxFloat32 pixG, + out FfxFloat32 pixB, +#ifdef FSR_RCAS_PASSTHROUGH_ALPHA + out FfxFloat32 pixA, +#endif + FfxUInt32x2 ip, // Integer pixel position in output. + FfxUInt32x4 con) + { // Constant generated by RcasSetup(). + // Algorithm uses minimal 3x3 pixel neighborhood. + // b + // d e f + // h + FfxInt32x2 sp = FfxInt32x2(ip); + FfxFloat32x3 b = FsrRcasLoadF(sp + FfxInt32x2(0, -1)).rgb; + FfxFloat32x3 d = FsrRcasLoadF(sp + FfxInt32x2(-1, 0)).rgb; +#ifdef FSR_RCAS_PASSTHROUGH_ALPHA + FfxFloat32x4 ee = FsrRcasLoadF(sp); + FfxFloat32x3 e = ee.rgb; + pixA = ee.a; +#else + FfxFloat32x3 e = FsrRcasLoadF(sp).rgb; +#endif + FfxFloat32x3 f = FsrRcasLoadF(sp + FfxInt32x2(1, 0)).rgb; + FfxFloat32x3 h = FsrRcasLoadF(sp + FfxInt32x2(0, 1)).rgb; + // Rename (32-bit) or regroup (16-bit). + FfxFloat32 bR = b.r; + FfxFloat32 bG = b.g; + FfxFloat32 bB = b.b; + FfxFloat32 dR = d.r; + FfxFloat32 dG = d.g; + FfxFloat32 dB = d.b; + FfxFloat32 eR = e.r; + FfxFloat32 eG = e.g; + FfxFloat32 eB = e.b; + FfxFloat32 fR = f.r; + FfxFloat32 fG = f.g; + FfxFloat32 fB = f.b; + FfxFloat32 hR = h.r; + FfxFloat32 hG = h.g; + FfxFloat32 hB = h.b; + // Run optional input transform. + FsrRcasInputF(bR, bG, bB); + FsrRcasInputF(dR, dG, dB); + FsrRcasInputF(eR, eG, eB); + FsrRcasInputF(fR, fG, fB); + FsrRcasInputF(hR, hG, hB); + // Luma times 2. + FfxFloat32 bL = bB * FfxFloat32(0.5) + (bR * FfxFloat32(0.5) + bG); + FfxFloat32 dL = dB * FfxFloat32(0.5) + (dR * FfxFloat32(0.5) + dG); + FfxFloat32 eL = eB * FfxFloat32(0.5) + (eR * FfxFloat32(0.5) + eG); + FfxFloat32 fL = fB * FfxFloat32(0.5) + (fR * FfxFloat32(0.5) + fG); + FfxFloat32 hL = hB * FfxFloat32(0.5) + (hR * FfxFloat32(0.5) + hG); + // Noise detection. + FfxFloat32 nz = FfxFloat32(0.25) * bL + FfxFloat32(0.25) * dL + FfxFloat32(0.25) * fL + FfxFloat32(0.25) * hL - eL; + nz = ffxSaturate(abs(nz) * ffxApproximateReciprocalMedium(ffxMax3(ffxMax3(bL, dL, eL), fL, hL) - ffxMin3(ffxMin3(bL, dL, eL), fL, hL))); + nz = FfxFloat32(-0.5) * nz + FfxFloat32(1.0); + // Min and max of ring. + FfxFloat32 mn4R = ffxMin(ffxMin3(bR, dR, fR), hR); + FfxFloat32 mn4G = ffxMin(ffxMin3(bG, dG, fG), hG); + FfxFloat32 mn4B = ffxMin(ffxMin3(bB, dB, fB), hB); + FfxFloat32 mx4R = max(ffxMax3(bR, dR, fR), hR); + FfxFloat32 mx4G = max(ffxMax3(bG, dG, fG), hG); + FfxFloat32 mx4B = max(ffxMax3(bB, dB, fB), hB); + // Immediate constants for peak range. + FfxFloat32x2 peakC = FfxFloat32x2(1.0, -1.0 * 4.0); + // Limiters, these need to be high precision RCPs. + FfxFloat32 hitMinR = mn4R * rcp(FfxFloat32(4.0) * mx4R); + FfxFloat32 hitMinG = mn4G * rcp(FfxFloat32(4.0) * mx4G); + FfxFloat32 hitMinB = mn4B * rcp(FfxFloat32(4.0) * mx4B); + FfxFloat32 hitMaxR = (peakC.x - mx4R) * rcp(FfxFloat32(4.0) * mn4R + peakC.y); + FfxFloat32 hitMaxG = (peakC.x - mx4G) * rcp(FfxFloat32(4.0) * mn4G + peakC.y); + FfxFloat32 hitMaxB = (peakC.x - mx4B) * rcp(FfxFloat32(4.0) * mn4B + peakC.y); + FfxFloat32 lobeR = max(-hitMinR, hitMaxR); + FfxFloat32 lobeG = max(-hitMinG, hitMaxG); + FfxFloat32 lobeB = max(-hitMinB, hitMaxB); + FfxFloat32 lobe = max(FfxFloat32(-FSR_RCAS_LIMIT), ffxMin(ffxMax3(lobeR, lobeG, lobeB), FfxFloat32(0.0))) * ffxAsFloat + (con.x); + // Apply noise removal. +#ifdef FSR_RCAS_DENOISE + lobe *= nz; +#endif + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + FfxFloat32 rcpL = ffxApproximateReciprocalMedium(FfxFloat32(4.0) * lobe + FfxFloat32(1.0)); + pixR = (lobe * bR + lobe * dR + lobe * hR + lobe * fR + eR) * rcpL; + pixG = (lobe * bG + lobe * dG + lobe * hG + lobe * fG + eG) * rcpL; + pixB = (lobe * bB + lobe * dB + lobe * hB + lobe * fB + eB) * rcpL; + return; + } +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// NON-PACKED 16-BIT VERSION +//============================================================================================================================== +#if defined(FFX_GPU) && FFX_HALF == 1 && defined(FSR_RCAS_H) + // Input callback prototypes that need to be implemented by calling shader + FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p); + void FsrRcasInputH(inout FfxFloat16 r,inout FfxFloat16 g,inout FfxFloat16 b); +//------------------------------------------------------------------------------------------------------------------------------ + void FsrRcasH( + out FfxFloat16 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy. + out FfxFloat16 pixG, + out FfxFloat16 pixB, + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + out FfxFloat16 pixA, + #endif + FfxUInt32x2 ip, // Integer pixel position in output. + FfxUInt32x4 con){ // Constant generated by RcasSetup(). + // Sharpening algorithm uses minimal 3x3 pixel neighborhood. + // b + // d e f + // h + FfxInt16x2 sp=FfxInt16x2(ip); + FfxFloat16x3 b=FsrRcasLoadH(sp+FfxInt16x2( 0,-1)).rgb; + FfxFloat16x3 d=FsrRcasLoadH(sp+FfxInt16x2(-1, 0)).rgb; + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + FfxFloat16x4 ee=FsrRcasLoadH(sp); + FfxFloat16x3 e=ee.rgb;pixA=ee.a; + #else + FfxFloat16x3 e=FsrRcasLoadH(sp).rgb; + #endif + FfxFloat16x3 f=FsrRcasLoadH(sp+FfxInt16x2( 1, 0)).rgb; + FfxFloat16x3 h=FsrRcasLoadH(sp+FfxInt16x2( 0, 1)).rgb; + // Rename (32-bit) or regroup (16-bit). + FfxFloat16 bR=b.r; + FfxFloat16 bG=b.g; + FfxFloat16 bB=b.b; + FfxFloat16 dR=d.r; + FfxFloat16 dG=d.g; + FfxFloat16 dB=d.b; + FfxFloat16 eR=e.r; + FfxFloat16 eG=e.g; + FfxFloat16 eB=e.b; + FfxFloat16 fR=f.r; + FfxFloat16 fG=f.g; + FfxFloat16 fB=f.b; + FfxFloat16 hR=h.r; + FfxFloat16 hG=h.g; + FfxFloat16 hB=h.b; + // Run optional input transform. + FsrRcasInputH(bR,bG,bB); + FsrRcasInputH(dR,dG,dB); + FsrRcasInputH(eR,eG,eB); + FsrRcasInputH(fR,fG,fB); + FsrRcasInputH(hR,hG,hB); + // Luma times 2. + FfxFloat16 bL=bB*FFX_BROADCAST_FLOAT16(0.5)+(bR*FFX_BROADCAST_FLOAT16(0.5)+bG); + FfxFloat16 dL=dB*FFX_BROADCAST_FLOAT16(0.5)+(dR*FFX_BROADCAST_FLOAT16(0.5)+dG); + FfxFloat16 eL=eB*FFX_BROADCAST_FLOAT16(0.5)+(eR*FFX_BROADCAST_FLOAT16(0.5)+eG); + FfxFloat16 fL=fB*FFX_BROADCAST_FLOAT16(0.5)+(fR*FFX_BROADCAST_FLOAT16(0.5)+fG); + FfxFloat16 hL=hB*FFX_BROADCAST_FLOAT16(0.5)+(hR*FFX_BROADCAST_FLOAT16(0.5)+hG); + // Noise detection. + FfxFloat16 nz=FFX_BROADCAST_FLOAT16(0.25)*bL+FFX_BROADCAST_FLOAT16(0.25)*dL+FFX_BROADCAST_FLOAT16(0.25)*fL+FFX_BROADCAST_FLOAT16(0.25)*hL-eL; + nz=ffxSaturate(abs(nz)*ffxApproximateReciprocalMediumHalf(ffxMax3Half(ffxMax3Half(bL,dL,eL),fL,hL)-ffxMin3Half(ffxMin3Half(bL,dL,eL),fL,hL))); + nz=FFX_BROADCAST_FLOAT16(-0.5)*nz+FFX_BROADCAST_FLOAT16(1.0); + // Min and max of ring. + FfxFloat16 mn4R=min(ffxMin3Half(bR,dR,fR),hR); + FfxFloat16 mn4G=min(ffxMin3Half(bG,dG,fG),hG); + FfxFloat16 mn4B=min(ffxMin3Half(bB,dB,fB),hB); + FfxFloat16 mx4R=max(ffxMax3Half(bR,dR,fR),hR); + FfxFloat16 mx4G=max(ffxMax3Half(bG,dG,fG),hG); + FfxFloat16 mx4B=max(ffxMax3Half(bB,dB,fB),hB); + // Immediate constants for peak range. + FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0); + // Limiters, these need to be high precision RCPs. + FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4R); + FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4G); + FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4B); + FfxFloat16 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4R+peakC.y); + FfxFloat16 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4G+peakC.y); + FfxFloat16 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4B+peakC.y); + FfxFloat16 lobeR=max(-hitMinR,hitMaxR); + FfxFloat16 lobeG=max(-hitMinG,hitMaxG); + FfxFloat16 lobeB=max(-hitMinB,hitMaxB); + FfxFloat16 lobe=max(FFX_BROADCAST_FLOAT16(-FSR_RCAS_LIMIT),min(ffxMax3Half(lobeR,lobeG,lobeB),FFX_BROADCAST_FLOAT16(0.0)))*FFX_UINT32_TO_FLOAT16X2(con.y).x; + // Apply noise removal. + #ifdef FSR_RCAS_DENOISE + lobe*=nz; + #endif + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + FfxFloat16 rcpL=ffxApproximateReciprocalMediumHalf(FFX_BROADCAST_FLOAT16(4.0)*lobe+FFX_BROADCAST_FLOAT16(1.0)); + pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; + pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; + pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL; +} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// PACKED 16-BIT VERSION +//============================================================================================================================== +#if defined(FFX_GPU)&& FFX_HALF == 1 && defined(FSR_RCAS_HX2) + // Input callback prototypes that need to be implemented by the calling shader + FfxFloat16x4 FsrRcasLoadHx2(FfxInt16x2 p); + void FsrRcasInputHx2(inout FfxFloat16x2 r,inout FfxFloat16x2 g,inout FfxFloat16x2 b); +//------------------------------------------------------------------------------------------------------------------------------ + // Can be used to convert from packed Structures of Arrays to Arrays of Structures for store. + void FsrRcasDepackHx2(out FfxFloat16x4 pix0,out FfxFloat16x4 pix1,FfxFloat16x2 pixR,FfxFloat16x2 pixG,FfxFloat16x2 pixB){ + #ifdef FFX_HLSL + // Invoke a slower path for DX only, since it won't allow uninitialized values. + pix0.a=pix1.a=0.0; + #endif + pix0.rgb=FfxFloat16x3(pixR.x,pixG.x,pixB.x); + pix1.rgb=FfxFloat16x3(pixR.y,pixG.y,pixB.y);} +//------------------------------------------------------------------------------------------------------------------------------ + void FsrRcasHx2( + // Output values are for 2 8x8 tiles in a 16x8 region. + // pix.x = left 8x8 tile + // pix.y = right 8x8 tile + // This enables later processing to easily be packed as well. + out FfxFloat16x2 pixR, + out FfxFloat16x2 pixG, + out FfxFloat16x2 pixB, + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + out FfxFloat16x2 pixA, + #endif + FfxUInt32x2 ip, // Integer pixel position in output. + FfxUInt32x4 con){ // Constant generated by RcasSetup(). + // No scaling algorithm uses minimal 3x3 pixel neighborhood. + FfxInt16x2 sp0=FfxInt16x2(ip); + FfxFloat16x3 b0=FsrRcasLoadHx2(sp0+FfxInt16x2( 0,-1)).rgb; + FfxFloat16x3 d0=FsrRcasLoadHx2(sp0+FfxInt16x2(-1, 0)).rgb; + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + FfxFloat16x4 ee0=FsrRcasLoadHx2(sp0); + FfxFloat16x3 e0=ee0.rgb;pixA.r=ee0.a; + #else + FfxFloat16x3 e0=FsrRcasLoadHx2(sp0).rgb; + #endif + FfxFloat16x3 f0=FsrRcasLoadHx2(sp0+FfxInt16x2( 1, 0)).rgb; + FfxFloat16x3 h0=FsrRcasLoadHx2(sp0+FfxInt16x2( 0, 1)).rgb; + FfxInt16x2 sp1=sp0+FfxInt16x2(8,0); + FfxFloat16x3 b1=FsrRcasLoadHx2(sp1+FfxInt16x2( 0,-1)).rgb; + FfxFloat16x3 d1=FsrRcasLoadHx2(sp1+FfxInt16x2(-1, 0)).rgb; + #ifdef FSR_RCAS_PASSTHROUGH_ALPHA + FfxFloat16x4 ee1=FsrRcasLoadHx2(sp1); + FfxFloat16x3 e1=ee1.rgb;pixA.g=ee1.a; + #else + FfxFloat16x3 e1=FsrRcasLoadHx2(sp1).rgb; + #endif + FfxFloat16x3 f1=FsrRcasLoadHx2(sp1+FfxInt16x2( 1, 0)).rgb; + FfxFloat16x3 h1=FsrRcasLoadHx2(sp1+FfxInt16x2( 0, 1)).rgb; + // Arrays of Structures to Structures of Arrays conversion. + FfxFloat16x2 bR=FfxFloat16x2(b0.r,b1.r); + FfxFloat16x2 bG=FfxFloat16x2(b0.g,b1.g); + FfxFloat16x2 bB=FfxFloat16x2(b0.b,b1.b); + FfxFloat16x2 dR=FfxFloat16x2(d0.r,d1.r); + FfxFloat16x2 dG=FfxFloat16x2(d0.g,d1.g); + FfxFloat16x2 dB=FfxFloat16x2(d0.b,d1.b); + FfxFloat16x2 eR=FfxFloat16x2(e0.r,e1.r); + FfxFloat16x2 eG=FfxFloat16x2(e0.g,e1.g); + FfxFloat16x2 eB=FfxFloat16x2(e0.b,e1.b); + FfxFloat16x2 fR=FfxFloat16x2(f0.r,f1.r); + FfxFloat16x2 fG=FfxFloat16x2(f0.g,f1.g); + FfxFloat16x2 fB=FfxFloat16x2(f0.b,f1.b); + FfxFloat16x2 hR=FfxFloat16x2(h0.r,h1.r); + FfxFloat16x2 hG=FfxFloat16x2(h0.g,h1.g); + FfxFloat16x2 hB=FfxFloat16x2(h0.b,h1.b); + // Run optional input transform. + FsrRcasInputHx2(bR,bG,bB); + FsrRcasInputHx2(dR,dG,dB); + FsrRcasInputHx2(eR,eG,eB); + FsrRcasInputHx2(fR,fG,fB); + FsrRcasInputHx2(hR,hG,hB); + // Luma times 2. + FfxFloat16x2 bL=bB*FFX_BROADCAST_FLOAT16X2(0.5)+(bR*FFX_BROADCAST_FLOAT16X2(0.5)+bG); + FfxFloat16x2 dL=dB*FFX_BROADCAST_FLOAT16X2(0.5)+(dR*FFX_BROADCAST_FLOAT16X2(0.5)+dG); + FfxFloat16x2 eL=eB*FFX_BROADCAST_FLOAT16X2(0.5)+(eR*FFX_BROADCAST_FLOAT16X2(0.5)+eG); + FfxFloat16x2 fL=fB*FFX_BROADCAST_FLOAT16X2(0.5)+(fR*FFX_BROADCAST_FLOAT16X2(0.5)+fG); + FfxFloat16x2 hL=hB*FFX_BROADCAST_FLOAT16X2(0.5)+(hR*FFX_BROADCAST_FLOAT16X2(0.5)+hG); + // Noise detection. + FfxFloat16x2 nz=FFX_BROADCAST_FLOAT16X2(0.25)*bL+FFX_BROADCAST_FLOAT16X2(0.25)*dL+FFX_BROADCAST_FLOAT16X2(0.25)*fL+FFX_BROADCAST_FLOAT16X2(0.25)*hL-eL; + nz=ffxSaturate(abs(nz)*ffxApproximateReciprocalMediumHalf(ffxMax3Half(ffxMax3Half(bL,dL,eL),fL,hL)-ffxMin3Half(ffxMin3Half(bL,dL,eL),fL,hL))); + nz=FFX_BROADCAST_FLOAT16X2(-0.5)*nz+FFX_BROADCAST_FLOAT16X2(1.0); + // Min and max of ring. + FfxFloat16x2 mn4R=min(ffxMin3Half(bR,dR,fR),hR); + FfxFloat16x2 mn4G=min(ffxMin3Half(bG,dG,fG),hG); + FfxFloat16x2 mn4B=min(ffxMin3Half(bB,dB,fB),hB); + FfxFloat16x2 mx4R=max(ffxMax3Half(bR,dR,fR),hR); + FfxFloat16x2 mx4G=max(ffxMax3Half(bG,dG,fG),hG); + FfxFloat16x2 mx4B=max(ffxMax3Half(bB,dB,fB),hB); + // Immediate constants for peak range. + FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0); + // Limiters, these need to be high precision RCPs. + FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4R); + FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4G); + FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4B); + FfxFloat16x2 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4R+peakC.y); + FfxFloat16x2 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4G+peakC.y); + FfxFloat16x2 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4B+peakC.y); + FfxFloat16x2 lobeR=max(-hitMinR,hitMaxR); + FfxFloat16x2 lobeG=max(-hitMinG,hitMaxG); + FfxFloat16x2 lobeB=max(-hitMinB,hitMaxB); + FfxFloat16x2 lobe=max(FFX_BROADCAST_FLOAT16X2(-FSR_RCAS_LIMIT),min(ffxMax3Half(lobeR,lobeG,lobeB),FFX_BROADCAST_FLOAT16X2(0.0)))*FFX_BROADCAST_FLOAT16X2(FFX_UINT32_TO_FLOAT16X2(con.y).x); + // Apply noise removal. + #ifdef FSR_RCAS_DENOISE + lobe*=nz; + #endif + // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. + FfxFloat16x2 rcpL=ffxApproximateReciprocalMediumHalf(FFX_BROADCAST_FLOAT16X2(4.0)*lobe+FFX_BROADCAST_FLOAT16X2(1.0)); + pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; + pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; + pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [LFGA] LINEAR FILM GRAIN APPLICATOR +// +//------------------------------------------------------------------------------------------------------------------------------ +// Adding output-resolution film grain after scaling is a good way to mask both rendering and scaling artifacts. +// Suggest using tiled blue noise as film grain input, with peak noise frequency set for a specific look and feel. +// The 'Lfga*()' functions provide a convenient way to introduce grain. +// These functions limit grain based on distance to signal limits. +// This is done so that the grain is temporally energy preserving, and thus won't modify image tonality. +// Grain application should be done in a linear colorspace. +// The grain should be temporally changing, but have a temporal sum per pixel that adds to zero (non-biased). +//------------------------------------------------------------------------------------------------------------------------------ +// Usage, +// FsrLfga*( +// color, // In/out linear colorspace color {0 to 1} ranged. +// grain, // Per pixel grain texture value {-0.5 to 0.5} ranged, input is 3-channel to support colored grain. +// amount); // Amount of grain (0 to 1} ranged. +//------------------------------------------------------------------------------------------------------------------------------ +// Example if grain texture is monochrome: 'FsrLfgaF(color,ffxBroadcast3(grain),amount)' +//============================================================================================================================== +#if defined(FFX_GPU) + // Maximum grain is the minimum distance to the signal limit. + void FsrLfgaF(inout FfxFloat32x3 c, FfxFloat32x3 t, FfxFloat32 a) + { + c += (t * ffxBroadcast3(a)) * ffxMin(ffxBroadcast3(1.0) - c, c); + } +#endif +//============================================================================================================================== +#if defined(FFX_GPU)&& FFX_HALF == 1 + // Half precision version (slower). + void FsrLfgaH(inout FfxFloat16x3 c, FfxFloat16x3 t, FfxFloat16 a) + { + c += (t * FFX_BROADCAST_FLOAT16X3(a)) * min(FFX_BROADCAST_FLOAT16X3(1.0) - c, c); + } + //------------------------------------------------------------------------------------------------------------------------------ + // Packed half precision version (faster). + void FsrLfgaHx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB,FfxFloat16x2 tR,FfxFloat16x2 tG,FfxFloat16x2 tB,FfxFloat16 a){ + cR+=(tR*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cR,cR);cG+=(tG*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cG,cG);cB+=(tB*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cB,cB);} +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [SRTM] SIMPLE REVERSIBLE TONE-MAPPER +// +//------------------------------------------------------------------------------------------------------------------------------ +// This provides a way to take linear HDR color {0 to FP16_MAX} and convert it into a temporary {0 to 1} ranged post-tonemapped linear. +// The tonemapper preserves RGB ratio, which helps maintain HDR color bleed during filtering. +//------------------------------------------------------------------------------------------------------------------------------ +// Reversible tonemapper usage, +// FsrSrtm*(color); // {0 to FP16_MAX} converted to {0 to 1}. +// FsrSrtmInv*(color); // {0 to 1} converted into {0 to 32768, output peak safe for FP16}. +//============================================================================================================================== +#if defined(FFX_GPU) + void FsrSrtmF(inout FfxFloat32x3 c) + { + c *= ffxBroadcast3(rcp(ffxMax3(c.r, c.g, c.b) + FfxFloat32(1.0))); + } + // The extra max solves the c=1.0 case (which is a /0). + void FsrSrtmInvF(inout FfxFloat32x3 c){c*=ffxBroadcast3(rcp(max(FfxFloat32(1.0/32768.0),FfxFloat32(1.0)-ffxMax3(c.r,c.g,c.b))));} +#endif +//============================================================================================================================== +#if defined(FFX_GPU )&& FFX_HALF == 1 + void FsrSrtmH(inout FfxFloat16x3 c) + { + c *= FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(ffxMax3Half(c.r, c.g, c.b) + FFX_BROADCAST_FLOAT16(1.0))); + } + void FsrSrtmInvH(inout FfxFloat16x3 c) + { + c *= FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(max(FFX_BROADCAST_FLOAT16(1.0 / 32768.0), FFX_BROADCAST_FLOAT16(1.0) - ffxMax3Half(c.r, c.g, c.b)))); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrSrtmHx2(inout FfxFloat16x2 cR, inout FfxFloat16x2 cG, inout FfxFloat16x2 cB) + { + FfxFloat16x2 rcp = ffxReciprocalHalf(ffxMax3Half(cR, cG, cB) + FFX_BROADCAST_FLOAT16X2(1.0)); + cR *= rcp; + cG *= rcp; + cB *= rcp; + } + void FsrSrtmInvHx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB) + { + FfxFloat16x2 rcp=ffxReciprocalHalf(max(FFX_BROADCAST_FLOAT16X2(1.0/32768.0),FFX_BROADCAST_FLOAT16X2(1.0)-ffxMax3Half(cR,cG,cB))); + cR*=rcp; + cG*=rcp; + cB*=rcp; + } +#endif +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//_____________________________________________________________/\_______________________________________________________________ +//============================================================================================================================== +// +// FSR - [TEPD] TEMPORAL ENERGY PRESERVING DITHER +// +//------------------------------------------------------------------------------------------------------------------------------ +// Temporally energy preserving dithered {0 to 1} linear to gamma 2.0 conversion. +// Gamma 2.0 is used so that the conversion back to linear is just to square the color. +// The conversion comes in 8-bit and 10-bit modes, designed for output to 8-bit UNORM or 10:10:10:2 respectively. +// Given good non-biased temporal blue noise as dither input, +// the output dither will temporally conserve energy. +// This is done by choosing the linear nearest step point instead of perceptual nearest. +// See code below for details. +//------------------------------------------------------------------------------------------------------------------------------ +// DX SPEC RULES FOR FLOAT->UNORM 8-BIT CONVERSION +// =============================================== +// - Output is 'FfxUInt32(floor(saturate(n)*255.0+0.5))'. +// - Thus rounding is to nearest. +// - NaN gets converted to zero. +// - INF is clamped to {0.0 to 1.0}. +//============================================================================================================================== +#if defined(FFX_GPU) + // Hand tuned integer position to dither value, with more values than simple checkerboard. + // Only 32-bit has enough precision for this compddation. + // Output is {0 to <1}. + FfxFloat32 FsrTepdDitF(FfxUInt32x2 p, FfxUInt32 f) + { + FfxFloat32 x = FfxFloat32(p.x + f); + FfxFloat32 y = FfxFloat32(p.y); + // The 1.61803 golden ratio. + FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0); + // Number designed to provide a good visual pattern. + FfxFloat32 b = FfxFloat32(1.0 / 3.69); + x = x * a + (y * b); + return ffxFract(x); + } + //------------------------------------------------------------------------------------------------------------------------------ + // This version is 8-bit gamma 2.0. + // The 'c' input is {0 to 1}. + // Output is {0 to 1} ready for image store. + void FsrTepdC8F(inout FfxFloat32x3 c, FfxFloat32 dit) + { + FfxFloat32x3 n = ffxSqrt(c); + n = floor(n * ffxBroadcast3(255.0)) * ffxBroadcast3(1.0 / 255.0); + FfxFloat32x3 a = n * n; + FfxFloat32x3 b = n + ffxBroadcast3(1.0 / 255.0); + b = b * b; + // Ratio of 'a' to 'b' required to produce 'c'. + // ffxApproximateReciprocal() won't work here (at least for very high dynamic ranges). + // ffxApproximateReciprocalMedium() is an IADD,FMA,MUL. + FfxFloat32x3 r = (c - b) * ffxApproximateReciprocalMedium(a - b); + // Use the ratio as a cutoff to choose 'a' or 'b'. + // ffxIsGreaterThanZero() is a MUL. + c = ffxSaturate(n + ffxIsGreaterThanZero(ffxBroadcast3(dit) - r) * ffxBroadcast3(1.0 / 255.0)); + } + //------------------------------------------------------------------------------------------------------------------------------ + // This version is 10-bit gamma 2.0. + // The 'c' input is {0 to 1}. + // Output is {0 to 1} ready for image store. + void FsrTepdC10F(inout FfxFloat32x3 c, FfxFloat32 dit) + { + FfxFloat32x3 n = ffxSqrt(c); + n = floor(n * ffxBroadcast3(1023.0)) * ffxBroadcast3(1.0 / 1023.0); + FfxFloat32x3 a = n * n; + FfxFloat32x3 b = n + ffxBroadcast3(1.0 / 1023.0); + b = b * b; + FfxFloat32x3 r = (c - b) * ffxApproximateReciprocalMedium(a - b); + c = ffxSaturate(n + ffxIsGreaterThanZero(ffxBroadcast3(dit) - r) * ffxBroadcast3(1.0 / 1023.0)); + } +#endif +//============================================================================================================================== +#if defined(FFX_GPU)&& FFX_HALF == 1 + FfxFloat16 FsrTepdDitH(FfxUInt32x2 p, FfxUInt32 f) + { + FfxFloat32 x = FfxFloat32(p.x + f); + FfxFloat32 y = FfxFloat32(p.y); + FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0); + FfxFloat32 b = FfxFloat32(1.0 / 3.69); + x = x * a + (y * b); + return FfxFloat16(ffxFract(x)); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC8H(inout FfxFloat16x3 c, FfxFloat16 dit) + { + FfxFloat16x3 n = sqrt(c); + n = floor(n * FFX_BROADCAST_FLOAT16X3(255.0)) * FFX_BROADCAST_FLOAT16X3(1.0 / 255.0); + FfxFloat16x3 a = n * n; + FfxFloat16x3 b = n + FFX_BROADCAST_FLOAT16X3(1.0 / 255.0); + b = b * b; + FfxFloat16x3 r = (c - b) * ffxApproximateReciprocalMediumHalf(a - b); + c = ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFX_BROADCAST_FLOAT16X3(dit) - r) * FFX_BROADCAST_FLOAT16X3(1.0 / 255.0)); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC10H(inout FfxFloat16x3 c, FfxFloat16 dit) + { + FfxFloat16x3 n = sqrt(c); + n = floor(n * FFX_BROADCAST_FLOAT16X3(1023.0)) * FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0); + FfxFloat16x3 a = n * n; + FfxFloat16x3 b = n + FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0); + b = b * b; + FfxFloat16x3 r = (c - b) * ffxApproximateReciprocalMediumHalf(a - b); + c = ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFX_BROADCAST_FLOAT16X3(dit) - r) * FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0)); + } + //============================================================================================================================== + // This computes dither for positions 'p' and 'p+{8,0}'. + FfxFloat16x2 FsrTepdDitHx2(FfxUInt32x2 p, FfxUInt32 f) + { + FfxFloat32x2 x; + x.x = FfxFloat32(p.x + f); + x.y = x.x + FfxFloat32(8.0); + FfxFloat32 y = FfxFloat32(p.y); + FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0); + FfxFloat32 b = FfxFloat32(1.0 / 3.69); + x = x * ffxBroadcast2(a) + ffxBroadcast2(y * b); + return FfxFloat16x2(ffxFract(x)); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC8Hx2(inout FfxFloat16x2 cR, inout FfxFloat16x2 cG, inout FfxFloat16x2 cB, FfxFloat16x2 dit) + { + FfxFloat16x2 nR = sqrt(cR); + FfxFloat16x2 nG = sqrt(cG); + FfxFloat16x2 nB = sqrt(cB); + nR = floor(nR * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + nG = floor(nG * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + nB = floor(nB * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + FfxFloat16x2 aR = nR * nR; + FfxFloat16x2 aG = nG * nG; + FfxFloat16x2 aB = nB * nB; + FfxFloat16x2 bR = nR + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + bR = bR * bR; + FfxFloat16x2 bG = nG + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + bG = bG * bG; + FfxFloat16x2 bB = nB + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0); + bB = bB * bB; + FfxFloat16x2 rR = (cR - bR) * ffxApproximateReciprocalMediumHalf(aR - bR); + FfxFloat16x2 rG = (cG - bG) * ffxApproximateReciprocalMediumHalf(aG - bG); + FfxFloat16x2 rB = (cB - bB) * ffxApproximateReciprocalMediumHalf(aB - bB); + cR = ffxSaturate(nR + ffxIsGreaterThanZeroHalf(dit - rR) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0)); + cG = ffxSaturate(nG + ffxIsGreaterThanZeroHalf(dit - rG) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0)); + cB = ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0)); + } + //------------------------------------------------------------------------------------------------------------------------------ + void FsrTepdC10Hx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB,FfxFloat16x2 dit){ + FfxFloat16x2 nR=sqrt(cR); + FfxFloat16x2 nG=sqrt(cG); + FfxFloat16x2 nB=sqrt(cB); + nR=floor(nR*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0); + nG=floor(nG*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0); + nB=floor(nB*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0); + FfxFloat16x2 aR=nR*nR; + FfxFloat16x2 aG=nG*nG; + FfxFloat16x2 aB=nB*nB; + FfxFloat16x2 bR=nR+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bR=bR*bR; + FfxFloat16x2 bG=nG+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bG=bG*bG; + FfxFloat16x2 bB=nB+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bB=bB*bB; + FfxFloat16x2 rR=(cR-bR)*ffxApproximateReciprocalMediumHalf(aR-bR); + FfxFloat16x2 rG=(cG-bG)*ffxApproximateReciprocalMediumHalf(aG-bG); + FfxFloat16x2 rB=(cB-bB)*ffxApproximateReciprocalMediumHalf(aB-bB); + cR=ffxSaturate(nR+ffxIsGreaterThanZeroHalf(dit-rR)*FFX_BROADCAST_FLOAT16X2(1.0/1023.0)); + cG=ffxSaturate(nG+ffxIsGreaterThanZeroHalf(dit-rG)*FFX_BROADCAST_FLOAT16X2(1.0/1023.0)); + cB = ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFX_BROADCAST_FLOAT16X2(1.0 / 1023.0)); +} +#endif diff --git a/Assets/Shaders/FSR2/ffx_fsr1.h.meta b/Assets/Shaders/FSR2/ffx_fsr1.h.meta new file mode 100644 index 0000000..3c4a35e --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr1.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: b1ff8d458fc2ffe42a18cd4b71986e64 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr2_accumulate.h b/Assets/Shaders/FSR2/ffx_fsr2_accumulate.h new file mode 100644 index 0000000..83fd286 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_accumulate.h @@ -0,0 +1,279 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +FFX_MIN16_F4 WrapDepthClipMask(FFX_MIN16_I2 iPxSample) +{ + return FFX_MIN16_F4(LoadDepthClip(iPxSample).r, 0, 0, 0); +} + +DeclareCustomFetchBilinearSamples(FetchDepthClipMaskSamples, WrapDepthClipMask) +DeclareCustomTextureSample(DepthClipMaskSample, Bilinear, FetchDepthClipMaskSamples) + +FFX_MIN16_F4 WrapTransparencyAndCompositionMask(FFX_MIN16_I2 iPxSample) +{ + return FFX_MIN16_F4(LoadTransparencyAndCompositionMask(iPxSample).r, 0, 0, 0); +} + +DeclareCustomFetchBilinearSamples(FetchTransparencyAndCompositionMaskSamples, WrapTransparencyAndCompositionMask) +DeclareCustomTextureSample(TransparencyAndCompositionMaskSample, Bilinear, FetchTransparencyAndCompositionMaskSamples) + +FfxFloat32x4 WrapLumaStabilityFactor(FFX_MIN16_I2 iPxSample) +{ + return FfxFloat32x4(LoadLumaStabilityFactor(iPxSample), 0, 0, 0); +} + +DeclareCustomFetchBilinearSamples(FetchLumaStabilitySamples, WrapLumaStabilityFactor) +DeclareCustomTextureSample(LumaStabilityFactorSample, Bilinear, FetchLumaStabilitySamples) + +FfxFloat32 GetPxHrVelocity(FfxFloat32x2 fMotionVector) +{ + return length(fMotionVector * DisplaySize()); +} + +void Accumulate(FFX_MIN16_I2 iPxHrPos, FFX_PARAMETER_INOUT FfxFloat32x4 fHistory, FFX_PARAMETER_IN FfxFloat32x4 fUpsampled, FFX_PARAMETER_IN FfxFloat32 fDepthClipFactor, FFX_PARAMETER_IN FfxFloat32 fHrVelocity) +{ + + fHistory.w = fHistory.w + fUpsampled.w; + + fUpsampled.rgb = YCoCgToRGB(fUpsampled.rgb); + + const FfxFloat32 fAlpha = fUpsampled.w / fHistory.w; + fHistory.rgb = ffxLerp(fHistory.rgb, fUpsampled.rgb, fAlpha); + + FfxFloat32 fMaxAverageWeight = ffxLerp(MaxAccumulationWeight(), accumulationMaxOnMotion, ffxSaturate(fHrVelocity * 10.0f)); + fHistory.w = ffxMin(fHistory.w, fMaxAverageWeight); +} + +void RectifyHistory( + RectificationBoxData clippingBox, + inout FfxFloat32x4 fHistory, + FFX_PARAMETER_IN LOCK_STATUS_T fLockStatus, + FFX_PARAMETER_IN UPSAMPLE_F fDepthClipFactor, + FFX_PARAMETER_IN UPSAMPLE_F fLumaStabilityFactor, + FFX_PARAMETER_IN UPSAMPLE_F fLuminanceDiff, + FFX_PARAMETER_IN UPSAMPLE_F fUpsampleWeight, + FFX_PARAMETER_IN FfxFloat32 fLockContributionThisFrame) +{ + UPSAMPLE_F fScaleFactorInfluence = UPSAMPLE_F(1.0f / DownscaleFactor().x - 1); + UPSAMPLE_F fBoxScale = UPSAMPLE_F(1.0f) + (UPSAMPLE_F(0.5f) * fScaleFactorInfluence); + + FFX_MIN16_F3 fScaledBoxVec = clippingBox.boxVec * fBoxScale; + UPSAMPLE_F3 boxMin = clippingBox.boxCenter - fScaledBoxVec; + UPSAMPLE_F3 boxMax = clippingBox.boxCenter + fScaledBoxVec; + UPSAMPLE_F3 boxCenter = clippingBox.boxCenter; + UPSAMPLE_F boxVecSize = length(clippingBox.boxVec); + + boxMin = ffxMax(clippingBox.aabbMin, boxMin); + boxMax = ffxMin(clippingBox.aabbMax, boxMax); + + UPSAMPLE_F3 distToClampOutside = UPSAMPLE_F3(ffxMax(ffxMax(UPSAMPLE_F3_BROADCAST(0.0f), boxMin - UPSAMPLE_F3(fHistory.xyz)), ffxMax(UPSAMPLE_F3_BROADCAST(0.0f), UPSAMPLE_F3(fHistory.xyz) - boxMax))); + + if (any(FFX_GREATER_THAN(distToClampOutside, UPSAMPLE_F3_BROADCAST(0.0f)))) { + + const UPSAMPLE_F3 clampedHistorySample = clamp(UPSAMPLE_F3(fHistory.xyz), boxMin, boxMax); + + UPSAMPLE_F3 clippedHistoryToBoxCenter = abs(clampedHistorySample - boxCenter); + UPSAMPLE_F3 historyToBoxCenter = abs(UPSAMPLE_F3(fHistory.xyz) - boxCenter); + UPSAMPLE_F3 HistoryColorWeight; + HistoryColorWeight.x = historyToBoxCenter.x > UPSAMPLE_F(0) ? clippedHistoryToBoxCenter.x / historyToBoxCenter.x : UPSAMPLE_F(0.0f); + HistoryColorWeight.y = historyToBoxCenter.y > UPSAMPLE_F(0) ? clippedHistoryToBoxCenter.y / historyToBoxCenter.y : UPSAMPLE_F(0.0f); + HistoryColorWeight.z = historyToBoxCenter.z > UPSAMPLE_F(0) ? clippedHistoryToBoxCenter.z / historyToBoxCenter.z : UPSAMPLE_F(0.0f); + + UPSAMPLE_F3 fHistoryContribution = HistoryColorWeight; + + // only lock luma + fHistoryContribution += UPSAMPLE_F3_BROADCAST(ffxMax(UPSAMPLE_F(fLockContributionThisFrame), fLumaStabilityFactor)); + fHistoryContribution *= (fDepthClipFactor * fDepthClipFactor); + + fHistory.xyz = FfxFloat32x3(ffxLerp(clampedHistorySample.xyz, fHistory.xyz, ffxSaturate(fHistoryContribution))); + } +} + +void WriteUpscaledOutput(FFX_MIN16_I2 iPxHrPos, FfxFloat32x3 fUpscaledColor) +{ + StoreUpscaledOutput(iPxHrPos, fUpscaledColor); +} + +FfxFloat32 GetLumaStabilityFactor(FfxFloat32x2 fHrUv, FfxFloat32 fHrVelocity) +{ + FfxFloat32 fLumaStabilityFactor = SampleLumaStabilityFactor(fHrUv); + + // Only apply on still, have to reproject luma history resource if we want it to work on motion + fLumaStabilityFactor *= FfxFloat32(fHrVelocity < 0.1f); + + return fLumaStabilityFactor; +} + +FfxFloat32 GetLockContributionThisFrame(FfxFloat32x2 fUvCoord, FfxFloat32 fAccumulationMask, FfxFloat32 fParticleMask, LOCK_STATUS_T fLockStatus) +{ + const UPSAMPLE_F fNormalizedLockLifetime = GetNormalizedRemainingLockLifetime(fLockStatus); + + // Rectify on lock frame + FfxFloat32 fLockContributionThisFrame = ffxSaturate(fNormalizedLockLifetime * UPSAMPLE_F(4)); + + fLockContributionThisFrame *= (1.0f - fParticleMask); + //Take down contribution in transparent areas + fLockContributionThisFrame *= FfxFloat32(fAccumulationMask.r > 0.1f); + + return fLockContributionThisFrame; +} + +void FinalizeLockStatus(FFX_MIN16_I2 iPxHrPos, LOCK_STATUS_T fLockStatus, FfxFloat32 fUpsampledWeight) +{ + // Increase trust + const UPSAMPLE_F fTrustIncreaseLanczosMax = UPSAMPLE_F(12); // same increase no matter the MaxAccumulationWeight() value. + const UPSAMPLE_F fTrustIncrease = UPSAMPLE_F(fUpsampledWeight / fTrustIncreaseLanczosMax); + fLockStatus[LOCK_TRUST] = ffxMin(LOCK_STATUS_F1(1), fLockStatus[LOCK_TRUST] + fTrustIncrease); + + // Decrease lock lifetime + const UPSAMPLE_F fLifetimeDecreaseLanczosMax = UPSAMPLE_F(JitterSequenceLength()) * UPSAMPLE_F(averageLanczosWeightPerFrame); + const UPSAMPLE_F fLifetimeDecrease = UPSAMPLE_F(fUpsampledWeight / fLifetimeDecreaseLanczosMax); + fLockStatus[LOCK_LIFETIME_REMAINING] = ffxMax(LOCK_STATUS_F1(0), fLockStatus[LOCK_LIFETIME_REMAINING] - fLifetimeDecrease); + + StoreLockStatus(iPxHrPos, fLockStatus); +} + +UPSAMPLE_F ComputeMaxAccumulationWeight(UPSAMPLE_F fHrVelocity, UPSAMPLE_F fReactiveMax, UPSAMPLE_F fDepthClipFactor, UPSAMPLE_F fLuminanceDiff, LockState lockState) { + + UPSAMPLE_F normalizedMinimum = UPSAMPLE_F(accumulationMaxOnMotion) / UPSAMPLE_F(MaxAccumulationWeight()); + + UPSAMPLE_F fReactiveMaxAccumulationWeight = UPSAMPLE_F(1) - fReactiveMax; + UPSAMPLE_F fMotionMaxAccumulationWeight = ffxLerp(UPSAMPLE_F(1), normalizedMinimum, ffxSaturate(fHrVelocity * UPSAMPLE_F(10))); + UPSAMPLE_F fDepthClipMaxAccumulationWeight = fDepthClipFactor; + + UPSAMPLE_F fLuminanceDiffMaxAccumulationWeight = ffxSaturate(ffxMax(normalizedMinimum, UPSAMPLE_F(1) - fLuminanceDiff)); + + UPSAMPLE_F maxAccumulation = UPSAMPLE_F(MaxAccumulationWeight()) * ffxMin( + ffxMin(fReactiveMaxAccumulationWeight, fMotionMaxAccumulationWeight), + ffxMin(fDepthClipMaxAccumulationWeight, fLuminanceDiffMaxAccumulationWeight) + ); + + return (lockState.NewLock && !lockState.WasLockedPrevFrame) ? UPSAMPLE_F(accumulationMaxOnMotion) : maxAccumulation; +} + +UPSAMPLE_F2 ComputeKernelWeight(in UPSAMPLE_F fHistoryWeight, in UPSAMPLE_F fDepthClipFactor, in UPSAMPLE_F fReactivityFactor) { + UPSAMPLE_F fKernelSizeBias = ffxSaturate(ffxMax(UPSAMPLE_F(0), fHistoryWeight - UPSAMPLE_F(0.5)) / UPSAMPLE_F(3)); + + //high bias on disocclusions + + UPSAMPLE_F fOneMinusReactiveMax = UPSAMPLE_F(1) - fReactivityFactor; + UPSAMPLE_F2 fKernelWeight = UPSAMPLE_F(1) + (UPSAMPLE_F(1.0f) / UPSAMPLE_F2(DownscaleFactor()) - UPSAMPLE_F(1)) * UPSAMPLE_F(fKernelSizeBias) * fOneMinusReactiveMax; + + //average value on disocclusion, to help decrease high value sample importance wait for accumulation to kick in + fKernelWeight *= FFX_BROADCAST_MIN_FLOAT16X2(UPSAMPLE_F(0.5) + fDepthClipFactor * UPSAMPLE_F(0.5)); + + return ffxMin(FFX_BROADCAST_MIN_FLOAT16X2(1.99), fKernelWeight); +} + +void Accumulate(FFX_MIN16_I2 iPxHrPos) +{ + const FfxFloat32x2 fSamplePosHr = iPxHrPos + 0.5f; + const FfxFloat32x2 fPxLrPos = fSamplePosHr * DownscaleFactor(); // Source resolution output pixel center position + const FfxInt32x2 iPxLrPos = FfxInt32x2(floor(fPxLrPos)); // TODO: what about weird upscale factors... + + const FfxFloat32x2 fSamplePosUnjitterLr = (FfxFloat32x2(iPxLrPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0 + + const FfxFloat32x2 fLrUvJittered = (fPxLrPos + Jitter()) / RenderSize(); + + const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize(); + const FfxFloat32x2 fMotionVector = GetMotionVector(iPxHrPos, fHrUv); + + const FfxFloat32 fHrVelocity = GetPxHrVelocity(fMotionVector); + const FfxFloat32 fDepthClipFactor = ffxSaturate(SampleDepthClip(fLrUvJittered)); + const FfxFloat32 fLumaStabilityFactor = GetLumaStabilityFactor(fHrUv, fHrVelocity); + const FfxFloat32 fAccumulationMask = 1.0f - TransparencyAndCompositionMaskSample(fLrUvJittered, RenderSize()).r; + + FfxInt32x2 offsetTL; + offsetTL.x = (fSamplePosUnjitterLr.x > fPxLrPos.x) ? FfxInt32(0) : FfxInt32(1); + offsetTL.y = (fSamplePosUnjitterLr.y > fPxLrPos.y) ? FfxInt32(0) : FfxInt32(1); + + const UPSAMPLE_F fReactiveMax = UPSAMPLE_F(1) - Pow3(UPSAMPLE_F(1) - LoadReactiveMax(FFX_MIN16_I2(iPxLrPos + offsetTL))); + + FfxFloat32x4 fHistoryColorAndWeight = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f); + LOCK_STATUS_T fLockStatus = CreateNewLockSample(); + FfxBoolean bIsExistingSample = FFX_TRUE; + + FfxFloat32x2 fReprojectedHrUv = FfxFloat32x2(0, 0); + ComputeReprojectedUVs(iPxHrPos, fMotionVector, fReprojectedHrUv, bIsExistingSample); + + if (bIsExistingSample) { + ReprojectHistoryColor(iPxHrPos, fReprojectedHrUv, fHistoryColorAndWeight); + ReprojectHistoryLockStatus(iPxHrPos, fReprojectedHrUv, fLockStatus); + } + + FFX_MIN16_F fLuminanceDiff = FFX_MIN16_F(0.0f); + + LockState lockState = PostProcessLockStatus(iPxHrPos, fLrUvJittered, FFX_MIN16_F(fDepthClipFactor), fHrVelocity, fHistoryColorAndWeight.w, fLockStatus, fLuminanceDiff); + + fHistoryColorAndWeight.w = ffxMin(fHistoryColorAndWeight.w, ComputeMaxAccumulationWeight( + UPSAMPLE_F(fHrVelocity), fReactiveMax, UPSAMPLE_F(fDepthClipFactor), UPSAMPLE_F(fLuminanceDiff), lockState + )); + + const UPSAMPLE_F fNormalizedLockLifetime = GetNormalizedRemainingLockLifetime(fLockStatus); + + // Kill accumulation based on shading change + fHistoryColorAndWeight.w = ffxMin(fHistoryColorAndWeight.w, FFX_MIN16_F(ffxMax(0.0f, MaxAccumulationWeight() * ffxPow(UPSAMPLE_F(1) - fLuminanceDiff, 2.0f / 1.0f)))); + + // Load upsampled input color + RectificationBoxData clippingBox; + + FfxFloat32 fKernelBias = fAccumulationMask * ffxSaturate(ffxMax(0.0f, fHistoryColorAndWeight.w - 0.5f) / 3.0f); + + FfxFloat32 fReactiveWeighted = 0; + + // No trust in reactive areas + fLockStatus[LOCK_TRUST] = ffxMin(fLockStatus[LOCK_TRUST], LOCK_STATUS_F1(1.0f) - LOCK_STATUS_F1(pow(fReactiveMax, 1.0f / 3.0f))); + fLockStatus[LOCK_TRUST] = ffxMin(fLockStatus[LOCK_TRUST], LOCK_STATUS_F1(fDepthClipFactor)); + + UPSAMPLE_F2 fKernelWeight = ComputeKernelWeight(UPSAMPLE_F(fHistoryColorAndWeight.w), UPSAMPLE_F(fDepthClipFactor), ffxMax((UPSAMPLE_F(1) - fLockStatus[LOCK_TRUST]), fReactiveMax)); + + UPSAMPLE_F4 fUpsampledColorAndWeight = ComputeUpsampledColorAndWeight(iPxHrPos, fKernelWeight, clippingBox); + + + FfxFloat32 fLockContributionThisFrame = GetLockContributionThisFrame(fHrUv, fAccumulationMask, fReactiveMax, fLockStatus); + + // Update accumulation and rectify history + if (fHistoryColorAndWeight.w > 0.0f) { + + RectifyHistory(clippingBox, fHistoryColorAndWeight, fLockStatus, UPSAMPLE_F(fDepthClipFactor), UPSAMPLE_F(fLumaStabilityFactor), UPSAMPLE_F(fLuminanceDiff), fUpsampledColorAndWeight.w, fLockContributionThisFrame); + + fHistoryColorAndWeight.rgb = YCoCgToRGB(fHistoryColorAndWeight.rgb); + } + + Accumulate(iPxHrPos, fHistoryColorAndWeight, fUpsampledColorAndWeight, fDepthClipFactor, fHrVelocity); + + //Subtract accumulation weight in reactive areas + fHistoryColorAndWeight.w -= FfxFloat32(fUpsampledColorAndWeight.w * fReactiveMax); + +#if FFX_FSR2_OPTION_HDR_COLOR_INPUT + fHistoryColorAndWeight.rgb = InverseTonemap(fHistoryColorAndWeight.rgb); +#endif + fHistoryColorAndWeight.rgb /= Exposure(); + + FinalizeLockStatus(iPxHrPos, fLockStatus, fUpsampledColorAndWeight.w); + + StoreInternalColorAndWeight(iPxHrPos, fHistoryColorAndWeight); + + // Output final color when RCAS is disabled +#if FFX_FSR2_OPTION_APPLY_SHARPENING == 0 + WriteUpscaledOutput(iPxHrPos, fHistoryColorAndWeight.rgb); +#endif +} diff --git a/Assets/Shaders/FSR2/ffx_fsr2_accumulate.h.meta b/Assets/Shaders/FSR2/ffx_fsr2_accumulate.h.meta new file mode 100644 index 0000000..77a8899 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_accumulate.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 83c949d45c1701541a93a43af9bb13fc +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr2_accumulate_pass.hlsl b/Assets/Shaders/FSR2/ffx_fsr2_accumulate_pass.hlsl new file mode 100644 index 0000000..d66b075 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_accumulate_pass.hlsl @@ -0,0 +1,90 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// FSR2 pass 5 +// SRV 4 : FSR2_Exposure : r_exposure +// SRV 6 : m_UpscaleTransparencyAndComposition : r_transparency_and_composition_mask +// SRV 8 : FSR2_DilatedVelocity : r_dilated_motion_vectors +// SRV 10 : FSR2_InternalUpscaled2 : r_internal_upscaled_color +// SRV 11 : FSR2_LockStatus2 : r_lock_status +// SRV 12 : FSR2_DepthClip : r_depth_clip +// SRV 13 : FSR2_PreparedInputColor : r_prepared_input_color +// SRV 14 : FSR2_LumaHistory : r_luma_history +// SRV 16 : FSR2_LanczosLutData : r_lanczos_lut +// SRV 26 : FSR2_MaximumUpsampleBias : r_upsample_maximum_bias_lut +// SRV 27 : FSR2_ReactiveMaskMax : r_reactive_max +// SRV 28 : FSR2_ExposureMips : r_imgMips +// UAV 10 : FSR2_InternalUpscaled1 : rw_internal_upscaled_color +// UAV 11 : FSR2_LockStatus1 : rw_lock_status +// UAV 18 : DisplayOutput : rw_upscaled_output +// CB 0 : cbFSR2 +// CB 1 : FSR2DispatchOffsets + +#define FSR2_BIND_SRV_EXPOSURE 0 +#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 1 +#define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 2 +#define FSR2_BIND_SRV_INTERNAL_UPSCALED 3 +#define FSR2_BIND_SRV_LOCK_STATUS 4 +#define FSR2_BIND_SRV_DEPTH_CLIP 5 +#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 6 +#define FSR2_BIND_SRV_LUMA_HISTORY 7 +#define FSR2_BIND_SRV_LANCZOS_LUT 8 +#define FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT 9 +#define FSR2_BIND_SRV_REACTIVE_MAX 10 +#define FSR2_BIND_SRV_EXPOSURE_MIPS 11 +#define FSR2_BIND_UAV_INTERNAL_UPSCALED 0 +#define FSR2_BIND_UAV_LOCK_STATUS 1 +#define FSR2_BIND_UAV_UPSCALED_OUTPUT 2 + +#define FSR2_BIND_CB_FSR2 0 + +#include "ffx_fsr2_callbacks_hlsl.h" +#include "ffx_fsr2_common.h" +#include "ffx_fsr2_sample.h" +#include "ffx_fsr2_upsample.h" +#include "ffx_fsr2_postprocess_lock_status.h" +#include "ffx_fsr2_reproject.h" +#include "ffx_fsr2_accumulate.h" + +#ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#define FFX_FSR2_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 +#endif // FFX_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#define FFX_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#ifndef FFX_FSR2_NUM_THREADS +#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR2_NUM_THREADS + +FFX_FSR2_NUM_THREADS +FFX_FSR2_EMBED_ROOTSIG_CONTENT +void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) +{ + const uint GroupRows = (uint(DisplaySize().y) + FFX_FSR2_THREAD_GROUP_HEIGHT - 1) / FFX_FSR2_THREAD_GROUP_HEIGHT; + uGroupId.y = GroupRows - uGroupId.y - 1; + + uint2 uDispatchThreadId = uGroupId * uint2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + uGroupThreadId; + + Accumulate(min16int2(uDispatchThreadId)); +} diff --git a/Assets/Shaders/FSR2/ffx_fsr2_accumulate_pass.hlsl.meta b/Assets/Shaders/FSR2/ffx_fsr2_accumulate_pass.hlsl.meta new file mode 100644 index 0000000..39a351a --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_accumulate_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 23360fd4b08e817458bc8523b94c47e0 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr2_autogen_reactive_pass.compute b/Assets/Shaders/FSR2/ffx_fsr2_autogen_reactive_pass.compute new file mode 100644 index 0000000..5ec0581 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_autogen_reactive_pass.compute @@ -0,0 +1,87 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR2_BIND_SRV_PRE_ALPHA_COLOR 0 +#define FSR2_BIND_SRV_POST_ALPHA_COLOR 1 +#define FSR2_BIND_UAV_REACTIVE 0 +#define FSR2_BIND_CB_FSR2 0 + +#include "ffx_fsr2_callbacks_hlsl.cginc" +#include "ffx_fsr2_common.cginc" + +#pragma kernel CS + +Texture2D r_input_color_pre_alpha : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PRE_ALPHA_COLOR); +Texture2D r_input_color_post_alpha : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_POST_ALPHA_COLOR); +RWTexture2D rw_output_reactive_mask : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_REACTIVE); + +#ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#define FFX_FSR2_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 +#endif // FFX_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#define FFX_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#ifndef FFX_FSR2_NUM_THREADS +#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR2_NUM_THREADS + +cbuffer cbGenerateReactive : register(b0) +{ + float scale; + float threshold; + uint flags; + float _padding_; +}; + +FFX_FSR2_NUM_THREADS +FFX_FSR2_EMBED_ROOTSIG_CONTENT +void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) +{ + uint2 uDispatchThreadId = uGroupId * uint2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + uGroupThreadId; + + float3 ColorPreAlpha = r_input_color_pre_alpha[uDispatchThreadId].rgb; + float3 ColorPostAlpha = r_input_color_post_alpha[uDispatchThreadId].rgb; + + if (flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP) + { + ColorPreAlpha = Tonemap(ColorPreAlpha); + ColorPostAlpha = Tonemap(ColorPostAlpha); + } + + if (flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP) + { + ColorPreAlpha = InverseTonemap(ColorPreAlpha); + ColorPostAlpha = InverseTonemap(ColorPostAlpha); + } + + float out_reactive_value = 0.f; + float3 delta = abs(ColorPostAlpha - ColorPreAlpha); + + out_reactive_value = (flags & FFX_FSR2_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX) ? max(delta.x, max(delta.y, delta.z)) : length(delta); + out_reactive_value *= scale; + + out_reactive_value = (flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD) ? (out_reactive_value < threshold ? 0 : 1) : out_reactive_value; + + rw_output_reactive_mask[uDispatchThreadId] = out_reactive_value; +} diff --git a/Assets/Shaders/FSR2/ffx_fsr2_autogen_reactive_pass.compute.meta b/Assets/Shaders/FSR2/ffx_fsr2_autogen_reactive_pass.compute.meta new file mode 100644 index 0000000..a7de3d9 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_autogen_reactive_pass.compute.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 912357a9240b1ef47a19dbddd659582f +ComputeShaderImporter: + externalObjects: {} + preprocessorOverride: 0 + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr2_callbacks_hlsl.cginc b/Assets/Shaders/FSR2/ffx_fsr2_callbacks_hlsl.cginc new file mode 100644 index 0000000..9160528 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_callbacks_hlsl.cginc @@ -0,0 +1,933 @@ +// Upgrade NOTE: replaced 'defined FSR2_BIND_SRV_DEPTH' with 'defined (FSR2_BIND_SRV_DEPTH)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_SRV_DEPTH_CLIP' with 'defined (FSR2_BIND_SRV_DEPTH_CLIP)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_SRV_DILATED_DEPTH' with 'defined (FSR2_BIND_SRV_DILATED_DEPTH)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_SRV_DILATED_MOTION_VECTORS' with 'defined (FSR2_BIND_SRV_DILATED_MOTION_VECTORS)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_SRV_EXPOSURE' with 'defined (FSR2_BIND_SRV_EXPOSURE)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_SRV_EXPOSURE_MIPS' with 'defined (FSR2_BIND_SRV_EXPOSURE_MIPS)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_SRV_INPUT_COLOR' with 'defined (FSR2_BIND_SRV_INPUT_COLOR)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_SRV_INTERNAL_UPSCALED' with 'defined (FSR2_BIND_SRV_INTERNAL_UPSCALED)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_SRV_LANCZOS_LUT' with 'defined (FSR2_BIND_SRV_LANCZOS_LUT)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_SRV_LOCK_STATUS' with 'defined (FSR2_BIND_SRV_LOCK_STATUS)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_SRV_LUMA_HISTORY' with 'defined (FSR2_BIND_SRV_LUMA_HISTORY)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_SRV_MOTION_VECTORS' with 'defined (FSR2_BIND_SRV_MOTION_VECTORS)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_SRV_PREPARED_INPUT_COLOR' with 'defined (FSR2_BIND_SRV_PREPARED_INPUT_COLOR)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_SRV_RCAS_INPUT' with 'defined (FSR2_BIND_SRV_RCAS_INPUT)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_SRV_REACTIVE_MASK' with 'defined (FSR2_BIND_SRV_REACTIVE_MASK)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_SRV_REACTIVE_MAX' with 'defined (FSR2_BIND_SRV_REACTIVE_MAX)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH' with 'defined (FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK' with 'defined (FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT' with 'defined (FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_UAV_DEPTH_CLIP' with 'defined (FSR2_BIND_UAV_DEPTH_CLIP)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_UAV_DILATED_DEPTH' with 'defined (FSR2_BIND_UAV_DILATED_DEPTH)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS' with 'defined (FSR2_BIND_UAV_DILATED_MOTION_VECTORS)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_UAV_EXPOSURE' with 'defined (FSR2_BIND_UAV_EXPOSURE)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_UAV_EXPOSURE_MIP_5' with 'defined (FSR2_BIND_UAV_EXPOSURE_MIP_5)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE' with 'defined (FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_UAV_INTERNAL_UPSCALED' with 'defined (FSR2_BIND_UAV_INTERNAL_UPSCALED)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_UAV_LOCK_STATUS' with 'defined (FSR2_BIND_UAV_LOCK_STATUS)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_UAV_LUMA_HISTORY' with 'defined (FSR2_BIND_UAV_LUMA_HISTORY)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR' with 'defined (FSR2_BIND_UAV_PREPARED_INPUT_COLOR)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_UAV_REACTIVE_MASK_MAX' with 'defined (FSR2_BIND_UAV_REACTIVE_MASK_MAX)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH' with 'defined (FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC' with 'defined (FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC)' +// Upgrade NOTE: replaced 'defined FSR2_BIND_UAV_UPSCALED_OUTPUT' with 'defined (FSR2_BIND_UAV_UPSCALED_OUTPUT)' + +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "ffx_fsr2_resources.h" + +#if defined(FFX_GPU) +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic push +#pragma dxc diagnostic ignored "-Wambig-lit-shift" +#endif //__hlsl_dx_compiler +#include "ffx_core.h" +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic pop +#endif //__hlsl_dx_compiler +#endif // #if defined(FFX_GPU) + +#if defined(FFX_GPU) +#ifndef FFX_FSR2_PREFER_WAVE64 +#define FFX_FSR2_PREFER_WAVE64 +#endif // #if defined(FFX_GPU) + +#if defined(FFX_GPU) +#pragma warning(disable: 3205) // conversion from larger type to smaller +#endif // #if defined(FFX_GPU) + +#define DECLARE_SRV_REGISTER(regIndex) t##regIndex +#define DECLARE_UAV_REGISTER(regIndex) u##regIndex +#define DECLARE_CB_REGISTER(regIndex) b##regIndex +#define FFX_FSR2_DECLARE_SRV(regIndex) register(DECLARE_SRV_REGISTER(regIndex)) +#define FFX_FSR2_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex)) +#define FFX_FSR2_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex)) + +#if defined(FSR2_BIND_CB_FSR2) + cbuffer cbFSR2 : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_FSR2) + { + + FfxInt32x2 iRenderSize; + FfxInt32x2 iDisplaySize; + FfxUInt32x2 uLumaMipDimensions; + FfxUInt32 uLumaMipLevelToUse; + FfxUInt32 uFrameIndex; + FfxFloat32x2 fDisplaySizeRcp; + FfxFloat32x2 fJitter; + FfxFloat32x4 fDeviceToViewDepth; + FfxFloat32x2 depthclip_uv_scale; + FfxFloat32x2 postprocessed_lockstatus_uv_scale; + FfxFloat32x2 reactive_mask_dim_rcp; + FfxFloat32x2 MotionVectorScale; + FfxFloat32x2 fDownscaleFactor; + FfxFloat32 fPreExposure; + FfxFloat32 fTanHalfFOV; + FfxFloat32x2 fMotionVectorJitterCancellation; + FfxFloat32 fJitterSequenceLength; + FfxFloat32 fLockInitialLifetime; + FfxFloat32 fLockTickDelta; + FfxFloat32 fDeltaTime; + FfxFloat32 fDynamicResChangeFactor; + FfxFloat32 fLumaMipRcp; +#define FFX_FSR2_CONSTANT_BUFFER_1_SIZE 36 // Number of 32-bit values. This must be kept in sync with the cbFSR2 size. + }; +#else + #define iRenderSize 0 + #define iDisplaySize 0 + #define uLumaMipDimensions 0 + #define uLumaMipLevelToUse 0 + #define uFrameIndex 0 + #define fDisplaySizeRcp 0 + #define fJitter 0 + #define fDeviceToViewDepth FfxFloat32x4(0,0,0,0) + #define depthclip_uv_scale 0 + #define postprocessed_lockstatus_uv_scale 0 + #define reactive_mask_dim_rcp 0 + #define MotionVectorScale 0 + #define fDownscaleFactor 0 + #define fPreExposure 0 + #define fTanHalfFOV 0 + #define fMotionVectorJitterCancellation 0 + #define fJitterSequenceLength 0 + #define fLockInitialLifetime 0 + #define fLockTickDelta 0 + #define fDeltaTime 0 + #define fDynamicResChangeFactor 0 + #define fLumaMipRcp 0 +#endif + +#if defined(FFX_GPU) +#define FFX_FSR2_ROOTSIG_STRINGIFY(p) FFX_FSR2_ROOTSIG_STR(p) +#define FFX_FSR2_ROOTSIG_STR(p) #p +#define FFX_FSR2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_1_SIZE) ", b0), " \ + "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \ + "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] + +#define FFX_FSR2_CONSTANT_BUFFER_2_SIZE 6 // Number of 32-bit values. This must be kept in sync with max( cbRCAS , cbSPD) size. + +#define FFX_FSR2_CB2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_1_SIZE) ", b0), " \ + "RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_2_SIZE) ", b1), " \ + "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \ + "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] +#if defined(FFX_FSR2_EMBED_ROOTSIG) +#define FFX_FSR2_EMBED_ROOTSIG_CONTENT FFX_FSR2_ROOTSIG +#define FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT FFX_FSR2_CB2_ROOTSIG +#else +#define FFX_FSR2_EMBED_ROOTSIG_CONTENT +#define FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT +#endif // #if FFX_FSR2_EMBED_ROOTSIG +#endif // #if defined(FFX_GPU) + + +FfxFloat32 LumaMipRcp() +{ + return fLumaMipRcp; +} + +uint2 LumaMipDimensions() +{ + return uLumaMipDimensions; +} + +FfxUInt32 LumaMipLevelToUse() +{ + return uLumaMipLevelToUse; +} + +FfxFloat32x2 DownscaleFactor() +{ + return fDownscaleFactor; +} + +FfxFloat32x2 Jitter() +{ + return fJitter; +} + +FfxFloat32x2 MotionVectorJitterCancellation() +{ + return fMotionVectorJitterCancellation; +} + +int2 RenderSize() +{ + return iRenderSize; +} + +int2 DisplaySize() +{ + return iDisplaySize; +} + +FfxFloat32x2 DisplaySizeRcp() +{ + return fDisplaySizeRcp; +} + +FfxFloat32 JitterSequenceLength() +{ + return fJitterSequenceLength; +} + +FfxFloat32 LockInitialLifetime() +{ + return fLockInitialLifetime; +} + +FfxFloat32 LockTickDelta() +{ + return fLockTickDelta; +} + +FfxFloat32 DeltaTime() +{ + return fDeltaTime; +} + +FfxFloat32 MaxAccumulationWeight() +{ + const FfxFloat32 averageLanczosWeightPerFrame = 0.74f; // Average lanczos weight for jitter accumulated samples + + return 12; //32.0f * averageLanczosWeightPerFrame; +} + +FfxFloat32 DynamicResChangeFactor() +{ + return fDynamicResChangeFactor; +} + +FfxUInt32 FrameIndex() +{ + return uFrameIndex; +} + +SamplerState s_PointClamp : register(s0); +SamplerState s_LinearClamp : register(s1); + + +typedef FFX_MIN16_F4 PREPARED_INPUT_COLOR_T; +typedef FFX_MIN16_F3 PREPARED_INPUT_COLOR_F3; +typedef FFX_MIN16_F PREPARED_INPUT_COLOR_F1; + +typedef FfxFloat32x3 UPSAMPLED_COLOR_T; + +#define RW_UPSAMPLED_WEIGHT_T FfxFloat32 + +typedef FFX_MIN16_F3 LOCK_STATUS_T; +typedef FFX_MIN16_F LOCK_STATUS_F1; + +// SRVs +#if defined(FFX_INTERNAL) + Texture2D r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR); + Texture2D r_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS); + Texture2D r_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH); + Texture2D r_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE); + Texture2D r_reactive_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK); + Texture2D r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK); + Texture2D r_ReconstructedPrevNearestDepth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH); + Texture2D r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS); + Texture2D r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH); + Texture2D r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR); + Texture2D r_lock_status : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS); + Texture2D r_depth_clip : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP); + Texture2D r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR); + Texture2D r_luma_history : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY); + Texture2D r_rcas_input : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT); + Texture2D r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT); + Texture2D r_imgMips : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE); + Texture2D r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT); + Texture2D r_reactive_max : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_REACTIVE_MAX); + + // declarations not current form, no accessor functions + Texture2D r_transparency_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_TRANSPARENCY_MASK); + Texture2D r_bias_current_color_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_BIAS_CURRENT_COLOR_MASK); + Texture2D r_gbuffer_albedo : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_ALBEDO); + Texture2D r_gbuffer_roughness : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_ROUGHNESS); + Texture2D r_gbuffer_metallic : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_METALLIC); + Texture2D r_gbuffer_specular : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_SPECULAR); + Texture2D r_gbuffer_subsurface : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_SUBSURFACE); + Texture2D r_gbuffer_normals : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_NORMALS); + Texture2D r_gbuffer_shading_mode_id : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_SHADING_MODE_ID); + Texture2D r_gbuffer_material_id : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_GBUFFER_MATERIAL_ID); + Texture2D r_motion_vectors_3d : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_VELOCITY_3D); + Texture2D r_is_particle_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_IS_PARTICLE_MASK); + Texture2D r_animated_texture_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_ANIMATED_TEXTURE_MASK); + Texture2D r_depth_high_res : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_HIGH_RES); + Texture2D r_position_view_space : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_POSITION_VIEW_SPACE); + Texture2D r_ray_tracing_hit_distance : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RAY_TRACING_HIT_DISTANCE); + Texture2D r_motion_vectors_reflection : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_VELOCITY_REFLECTION); + + // UAV declarations + RWTexture2D rw_ReconstructedPrevNearestDepth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH); + RWTexture2D rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS); + RWTexture2D rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH); + RWTexture2D rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR); + RWTexture2D rw_lock_status : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS); + RWTexture2D rw_depth_clip : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP); + RWTexture2D rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR); + RWTexture2D rw_luma_history : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY); + RWTexture2D rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT); + //globallycoherent RWTexture2D rw_imgMipmap[13] : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE); + globallycoherent RWTexture2D rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_SHADING_CHANGE); + globallycoherent RWTexture2D rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_5); + RWTexture2D rw_reactive_max : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_REACTIVE_MAX); + RWTexture2D rw_exposure : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_EXPOSURE); + globallycoherent RWTexture2D rw_spd_global_atomic : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT); + RWTexture2D rw_debug_out : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT); + +#else // #if defined(FFX_INTERNAL) + #if defined (FSR2_BIND_SRV_INPUT_COLOR) + Texture2D r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_COLOR); + #endif + #if defined (FSR2_BIND_SRV_MOTION_VECTORS) + Texture2D r_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_MOTION_VECTORS); + #endif + #if defined (FSR2_BIND_SRV_DEPTH) + Texture2D r_depth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DEPTH); + #endif + #if defined (FSR2_BIND_SRV_EXPOSURE) + Texture2D r_exposure : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_EXPOSURE); + #endif + #if defined (FSR2_BIND_SRV_REACTIVE_MASK) + Texture2D r_reactive_mask : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_REACTIVE_MASK); + #endif + #if defined (FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) + Texture2D r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK); + #endif + #if defined (FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) + Texture2D r_ReconstructedPrevNearestDepth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH); + #endif + #if defined (FSR2_BIND_SRV_DILATED_MOTION_VECTORS) + Texture2D r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_MOTION_VECTORS); + #endif + #if defined (FSR2_BIND_SRV_DILATED_DEPTH) + Texture2D r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_DEPTH); + #endif + #if defined (FSR2_BIND_SRV_INTERNAL_UPSCALED) + Texture2D r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INTERNAL_UPSCALED); + #endif + #if defined (FSR2_BIND_SRV_LOCK_STATUS) + #if FFX_COMPILE_FOR_SPIRV + Texture2D r_lock_status : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_STATUS); + #else + Texture2D r_lock_status : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_STATUS); + #endif + #endif + #if defined (FSR2_BIND_SRV_DEPTH_CLIP) + Texture2D r_depth_clip : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DEPTH_CLIP); + #endif + #if defined (FSR2_BIND_SRV_PREPARED_INPUT_COLOR) + #if FFX_COMPILE_FOR_SPIRV + Texture2D r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR); + #else + Texture2D r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR); + #endif + #endif + #if defined (FSR2_BIND_SRV_LUMA_HISTORY) + Texture2D r_luma_history : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY); + #endif + #if defined (FSR2_BIND_SRV_RCAS_INPUT) + Texture2D r_rcas_input : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RCAS_INPUT); + #endif + #if defined (FSR2_BIND_SRV_LANCZOS_LUT) + Texture2D r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LANCZOS_LUT); + #endif + #if defined (FSR2_BIND_SRV_EXPOSURE_MIPS) + Texture2D r_imgMips : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_EXPOSURE_MIPS); + #endif + #if defined (FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) + Texture2D r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT); + #endif + #if defined (FSR2_BIND_SRV_REACTIVE_MAX) + Texture2D r_reactive_max : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_REACTIVE_MAX); + #endif + + // UAV declarations + #if defined (FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) + RWTexture2D rw_ReconstructedPrevNearestDepth : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH); + #endif + #if defined (FSR2_BIND_UAV_DILATED_MOTION_VECTORS) + RWTexture2D rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_MOTION_VECTORS); + #endif + #if defined (FSR2_BIND_UAV_DILATED_DEPTH) + RWTexture2D rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_DEPTH); + #endif + #if defined (FSR2_BIND_UAV_INTERNAL_UPSCALED) + RWTexture2D rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_INTERNAL_UPSCALED); + #endif + #if defined (FSR2_BIND_UAV_LOCK_STATUS) + #if FFX_COMPILE_FOR_SPIRV + RWTexture2D rw_lock_status : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_STATUS); + #else + RWTexture2D rw_lock_status : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_STATUS); + #endif + #endif + #if defined (FSR2_BIND_UAV_DEPTH_CLIP) + RWTexture2D rw_depth_clip : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DEPTH_CLIP); + #endif + #if defined (FSR2_BIND_UAV_PREPARED_INPUT_COLOR) + #if FFX_COMPILE_FOR_SPIRV + RWTexture2D rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR); + #else + RWTexture2D rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR); + #endif + #endif + #if defined (FSR2_BIND_UAV_LUMA_HISTORY) + RWTexture2D rw_luma_history : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY); + #endif + #if defined (FSR2_BIND_UAV_UPSCALED_OUTPUT) + RWTexture2D rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_UPSCALED_OUTPUT); + #endif + #if defined (FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE) + globallycoherent RWTexture2D rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE); + #endif + #if defined (FSR2_BIND_UAV_EXPOSURE_MIP_5) + globallycoherent RWTexture2D rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_5); + #endif + #if defined (FSR2_BIND_UAV_REACTIVE_MASK_MAX) + RWTexture2D rw_reactive_max : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_REACTIVE_MASK_MAX); + #endif + #if defined (FSR2_BIND_UAV_EXPOSURE) + RWTexture2D rw_exposure : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE); + #endif + #if defined (FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC) + globallycoherent RWTexture2D rw_spd_global_atomic : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC); + #endif +#endif // #if defined(FFX_INTERNAL) + +FfxFloat32 LoadMipLuma(FFX_MIN16_I2 iPxPos, FfxUInt32 mipLevel) +{ +#if defined(FSR2_BIND_SRV_EXPOSURE_MIPS) || defined(FFX_INTERNAL) + return r_imgMips.mips[mipLevel][iPxPos]; +#else + return 0.f; +#endif +} +#if FFX_HALF +FfxFloat16 LoadMipLuma(FfxInt16x2 iPxPos, FfxUInt16 mipLevel) +{ +#if defined(FSR2_BIND_SRV_EXPOSURE_MIPS) || defined(FFX_INTERNAL) + return r_imgMips.mips[mipLevel][iPxPos]; +#else + return 0.f; +#endif +} +#endif +FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxUInt32 mipLevel) +{ +#if defined(FSR2_BIND_SRV_EXPOSURE_MIPS) || defined(FFX_INTERNAL) + fUV *= depthclip_uv_scale; + return r_imgMips.SampleLevel(s_LinearClamp, fUV, mipLevel); +#else + return 0.f; +#endif + +} +#if FFX_HALF +FfxFloat16 SampleMipLuma(FfxFloat16x2 fUV, FfxUInt32 mipLevel) +{ +#if defined(FSR2_BIND_SRV_EXPOSURE_MIPS) || defined(FFX_INTERNAL) + fUV *= FfxFloat16x2(depthclip_uv_scale); + return r_imgMips.SampleLevel(s_LinearClamp, fUV, mipLevel); +#else + return 0.f; +#endif +} +#endif + +// +// a 0 0 0 x +// 0 b 0 0 y +// 0 0 c d z +// 0 0 e 0 1 +// +// z' = (z*c+d)/(z*e) +// z' = (c/e) + d/(z*e) +// z' - (c/e) = d/(z*e) +// (z'e - c)/e = d/(z*e) +// e / (z'e - c) = (z*e)/d +// (e * d) / (z'e - c) = z*e +// z = d / (z'e - c) +FfxFloat32 ConvertFromDeviceDepthToViewSpace(FfxFloat32 fDeviceDepth) +{ + return -fDeviceToViewDepth[2] / (fDeviceDepth * fDeviceToViewDepth[1] - fDeviceToViewDepth[0]); +} + +FfxFloat32 LoadInputDepth(FFX_MIN16_I2 iPxPos) +{ +#if defined(FSR2_BIND_SRV_DEPTH) || defined(FFX_INTERNAL) + return r_depth[iPxPos]; +#else + return 0.f; +#endif +} + +FFX_MIN16_F LoadReactiveMask(FFX_MIN16_I2 iPxPos) +{ +#if defined(FSR2_BIND_SRV_REACTIVE_MASK) || defined(FFX_INTERNAL) + return r_reactive_mask[iPxPos]; +#else + return 0.f; +#endif +} + +FfxFloat32x4 GatherReactiveMask(int2 iPxPos) +{ +#if defined(FSR2_BIND_SRV_REACTIVE_MASK) || defined(FFX_INTERNAL) + return r_reactive_mask.GatherRed(s_LinearClamp, FfxFloat32x2(iPxPos) * reactive_mask_dim_rcp); +#else + return 0.f; +#endif +} + +FFX_MIN16_F LoadTransparencyAndCompositionMask(FFX_MIN16_I2 iPxPos) +{ +#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) || defined(FFX_INTERNAL) + return r_transparency_and_composition_mask[iPxPos]; +#else + return 0.f; +#endif +} + +FfxFloat32 PreExposure() +{ + return fPreExposure; +} + +FfxFloat32x3 LoadInputColor(FFX_MIN16_I2 iPxPos) +{ +#if defined(FSR2_BIND_SRV_INPUT_COLOR) || defined(FFX_INTERNAL) + return r_input_color_jittered[iPxPos].rgb / PreExposure(); +#else + return 0; +#endif +} + +FfxFloat32x3 LoadInputColorWithoutPreExposure(FFX_MIN16_I2 iPxPos) +{ +#if defined(FSR2_BIND_SRV_INPUT_COLOR) || defined(FFX_INTERNAL) + return r_input_color_jittered[iPxPos].rgb; +#else + return 0; +#endif +} + +#if FFX_HALF +FfxFloat16x3 LoadPreparedInputColor(FfxInt16x2 iPxPos) +{ +#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL) + return r_prepared_input_color[iPxPos].rgb; +#else + return 0.f; +#endif +} +#endif + +FFX_MIN16_F3 LoadPreparedInputColor(int2 iPxPos) +{ +#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL) + return r_prepared_input_color[iPxPos].rgb; +#else + return 0.f; +#endif +} + +FFX_MIN16_F LoadPreparedInputColorLuma(FFX_MIN16_I2 iPxPos) +{ +#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL) + return r_prepared_input_color[iPxPos].a; +#else + return 0.f; +#endif +} + +FfxFloat32x2 LoadInputMotionVector(FFX_MIN16_I2 iPxDilatedMotionVectorPos) +{ +#if defined(FSR2_BIND_SRV_MOTION_VECTORS) || defined(FFX_INTERNAL) + FfxFloat32x2 fSrcMotionVector = r_motion_vectors[iPxDilatedMotionVectorPos].xy; +#else + FfxFloat32x2 fSrcMotionVector = 0.f; +#endif + + FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale; + +#if FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS + fUvMotionVector -= fMotionVectorJitterCancellation; +#endif + + return fUvMotionVector; +} + +FFX_MIN16_F4 LoadHistory(int2 iPxHistory) +{ +#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL) + return r_internal_upscaled_color[iPxHistory]; +#else + return 0.f; +#endif +} + +FfxFloat32x4 LoadRwInternalUpscaledColorAndWeight(FFX_MIN16_I2 iPxPos) +{ +#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL) + return rw_internal_upscaled_color[iPxPos]; +#else + return 0.f; +#endif +} + +void StoreLumaHistory(FFX_MIN16_I2 iPxPos, FfxFloat32x4 fLumaHistory) +{ +#if defined(FSR2_BIND_UAV_LUMA_HISTORY) || defined(FFX_INTERNAL) + rw_luma_history[iPxPos] = fLumaHistory; +#endif +} + +FfxFloat32x4 LoadRwLumaHistory(FFX_MIN16_I2 iPxPos) +{ +#if defined(FSR2_BIND_UAV_LUMA_HISTORY) || defined(FFX_INTERNAL) + return rw_luma_history[iPxPos]; +#else + return 1.f; +#endif +} + +FfxFloat32 LoadLumaStabilityFactor(FFX_MIN16_I2 iPxPos) +{ +#if defined(FSR2_BIND_SRV_LUMA_HISTORY) || defined(FFX_INTERNAL) + return r_luma_history[iPxPos].w; +#else + return 0.f; +#endif +} + +FfxFloat32 SampleLumaStabilityFactor(FfxFloat32x2 fUV) +{ +#if defined(FSR2_BIND_SRV_LUMA_HISTORY) || defined(FFX_INTERNAL) + fUV *= depthclip_uv_scale; + return r_luma_history.SampleLevel(s_LinearClamp, fUV, 0).w; +#else + return 0.f; +#endif +} + +void StoreReprojectedHistory(FFX_MIN16_I2 iPxHistory, FFX_MIN16_F4 fHistory) +{ +#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL) + rw_internal_upscaled_color[iPxHistory] = fHistory; +#endif +} + +void StoreInternalColorAndWeight(FFX_MIN16_I2 iPxPos, FfxFloat32x4 fColorAndWeight) +{ +#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL) + rw_internal_upscaled_color[iPxPos] = fColorAndWeight; +#endif +} + +void StoreUpscaledOutput(FFX_MIN16_I2 iPxPos, FfxFloat32x3 fColor) +{ +#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT) || defined(FFX_INTERNAL) + rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor * PreExposure(), 1.f); +#endif +} + +//LOCK_LIFETIME_REMAINING == 0 +//Should make LockInitialLifetime() return a const 1.0f later +LOCK_STATUS_T LoadLockStatus(FFX_MIN16_I2 iPxPos) +{ +#if defined(FSR2_BIND_SRV_LOCK_STATUS) || defined(FFX_INTERNAL) + LOCK_STATUS_T fLockStatus = r_lock_status[iPxPos]; + + fLockStatus[0] -= LockInitialLifetime() * 2.0f; + return fLockStatus; +#else + return 0.f; +#endif + + +} + +LOCK_STATUS_T LoadRwLockStatus(int2 iPxPos) +{ +#if defined(FSR2_BIND_UAV_LOCK_STATUS) || defined(FFX_INTERNAL) + LOCK_STATUS_T fLockStatus = rw_lock_status[iPxPos]; + + fLockStatus[0] -= LockInitialLifetime() * 2.0f; + + return fLockStatus; +#else + return 0.f; +#endif +} + +void StoreLockStatus(FFX_MIN16_I2 iPxPos, LOCK_STATUS_T fLockstatus) +{ +#if defined(FSR2_BIND_UAV_LOCK_STATUS) || defined(FFX_INTERNAL) + fLockstatus[0] += LockInitialLifetime() * 2.0f; + + rw_lock_status[iPxPos] = fLockstatus; +#endif +} + +void StorePreparedInputColor(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN PREPARED_INPUT_COLOR_T fTonemapped) +{ +#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL) + rw_prepared_input_color[iPxPos] = fTonemapped; +#endif +} + +FfxBoolean IsResponsivePixel(FFX_MIN16_I2 iPxPos) +{ + return FFX_FALSE; //not supported in prototype +} + +FfxFloat32 LoadDepthClip(FFX_MIN16_I2 iPxPos) +{ +#if defined(FSR2_BIND_SRV_DEPTH_CLIP) || defined(FFX_INTERNAL) + return r_depth_clip[iPxPos]; +#else + return 0.f; +#endif +} + +FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV) +{ +#if defined(FSR2_BIND_SRV_DEPTH_CLIP) || defined(FFX_INTERNAL) + fUV *= depthclip_uv_scale; + return r_depth_clip.SampleLevel(s_LinearClamp, fUV, 0); +#else + return 0.f; +#endif +} + +LOCK_STATUS_T SampleLockStatus(FfxFloat32x2 fUV) +{ +#if defined(FSR2_BIND_SRV_LOCK_STATUS) || defined(FFX_INTERNAL) + fUV *= postprocessed_lockstatus_uv_scale; + LOCK_STATUS_T fLockStatus = r_lock_status.SampleLevel(s_LinearClamp, fUV, 0); + fLockStatus[0] -= LockInitialLifetime() * 2.0f; + return fLockStatus; +#else + return 0.f; +#endif +} + +void StoreDepthClip(FFX_MIN16_I2 iPxPos, FfxFloat32 fClip) +{ +#if defined(FSR2_BIND_UAV_DEPTH_CLIP) || defined(FFX_INTERNAL) + rw_depth_clip[iPxPos] = fClip; +#endif +} + +FfxFloat32 TanHalfFoV() +{ + return fTanHalfFOV; +} + +FfxFloat32 LoadSceneDepth(FFX_MIN16_I2 iPxInput) +{ +#if defined(FSR2_BIND_SRV_DEPTH) || defined(FFX_INTERNAL) + return r_depth[iPxInput]; +#else + return 0.f; +#endif +} + +FfxFloat32 LoadReconstructedPrevDepth(FFX_MIN16_I2 iPxPos) +{ +#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL) + return asfloat(r_ReconstructedPrevNearestDepth[iPxPos]); +#else + return 0; +#endif +} + +void StoreReconstructedDepth(FFX_MIN16_I2 iPxSample, FfxFloat32 fDepth) +{ + FfxUInt32 uDepth = asuint(fDepth); +#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL) + #if FFX_FSR2_OPTION_INVERTED_DEPTH + InterlockedMax(rw_ReconstructedPrevNearestDepth[iPxSample], uDepth); + #else + InterlockedMin(rw_ReconstructedPrevNearestDepth[iPxSample], uDepth); // min for standard, max for inverted depth + #endif +#endif +} + +void SetReconstructedDepth(FFX_MIN16_I2 iPxSample, const FfxUInt32 uValue) +{ +#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL) + rw_ReconstructedPrevNearestDepth[iPxSample] = uValue; +#endif +} + +void StoreDilatedDepth(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth) +{ +#if defined(FSR2_BIND_UAV_DILATED_DEPTH) || defined(FFX_INTERNAL) + //FfxUInt32 uDepth = f32tof16(fDepth); + rw_dilatedDepth[iPxPos] = fDepth; +#endif +} + +void StoreDilatedMotionVector(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector) +{ +#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL) + rw_dilated_motion_vectors[iPxPos] = fMotionVector; +#endif +} + +FfxFloat32x2 LoadDilatedMotionVector(FFX_MIN16_I2 iPxInput) +{ +#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL) + return r_dilated_motion_vectors[iPxInput].xy; +#else + return 0.f; +#endif +} + +FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV) +{ +#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL) + fUV *= depthclip_uv_scale; // TODO: assuming these are (RenderSize() / MaxRenderSize()) + return r_dilated_motion_vectors.SampleLevel(s_LinearClamp, fUV, 0); +#else + return 0.f; +#endif +} + +FfxFloat32 LoadDilatedDepth(FFX_MIN16_I2 iPxInput) +{ +#if defined(FSR2_BIND_SRV_DILATED_DEPTH) || defined(FFX_INTERNAL) + return r_dilatedDepth[iPxInput]; +#else + return 0.f; +#endif +} + +FfxFloat32 Exposure() +{ + // return 1.0f; + #if defined(FSR2_BIND_SRV_EXPOSURE) || defined(FFX_INTERNAL) + FfxFloat32 exposure = r_exposure[FFX_MIN16_I2(0, 0)].x; + #else + FfxFloat32 exposure = 1.f; + #endif + + if (exposure == 0.0f) { + exposure = 1.0f; + } + + return exposure; +} + +FfxFloat32 SampleLanczos2Weight(FfxFloat32 x) +{ +#if defined(FSR2_BIND_SRV_LANCZOS_LUT) || defined(FFX_INTERNAL) + return r_lanczos_lut.SampleLevel(s_LinearClamp, FfxFloat32x2(x / 2, 0.5f), 0); +#else + return 0.f; +#endif +} + +#if FFX_HALF +FfxFloat16 SampleLanczos2Weight(FfxFloat16 x) +{ +#if defined(FSR2_BIND_SRV_LANCZOS_LUT) || defined(FFX_INTERNAL) + return r_lanczos_lut.SampleLevel(s_LinearClamp, FfxFloat16x2(x / 2, 0.5f), 0); +#else + return 0.f; +#endif +} +#endif + +FFX_MIN16_F SampleUpsampleMaximumBias(FFX_MIN16_F2 uv) +{ +#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) || defined(FFX_INTERNAL) + // Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range. + return FFX_MIN16_F(2.0) * r_upsample_maximum_bias_lut.SampleLevel(s_LinearClamp, abs(uv) * 2.0, 0); +#else + return 0.f; +#endif +} + +FFX_MIN16_F LoadReactiveMax(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ +#if defined(FSR2_BIND_SRV_REACTIVE_MAX) || defined(FFX_INTERNAL) + return r_reactive_max[iPxPos]; +#else + return 0.f; +#endif +} + +void StoreReactiveMax(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F fReactiveMax) +{ +#if defined(FSR2_BIND_UAV_REACTIVE_MASK_MAX) || defined(FFX_INTERNAL) + rw_reactive_max[iPxPos] = fReactiveMax; +#endif +} + +#endif // #if defined(FFX_GPU) diff --git a/Assets/Shaders/FSR2/ffx_fsr2_callbacks_hlsl.cginc.meta b/Assets/Shaders/FSR2/ffx_fsr2_callbacks_hlsl.cginc.meta new file mode 100644 index 0000000..18ecc11 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_callbacks_hlsl.cginc.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 8401ac79c85ddc64caeed4d788f7e852 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr2_common.cginc b/Assets/Shaders/FSR2/ffx_fsr2_common.cginc new file mode 100644 index 0000000..7be6631 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_common.cginc @@ -0,0 +1,356 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#if !defined(FFX_FSR2_COMMON_H) +#define FFX_FSR2_COMMON_H + +#if defined(FFX_CPU) || defined(FFX_GPU) +//Locks +#define LOCK_LIFETIME_REMAINING 0 +#define LOCK_TEMPORAL_LUMA 1 +#define LOCK_TRUST 2 +#endif // #if defined(FFX_CPU) || defined(FFX_GPU) + +#if defined(FFX_GPU) +FFX_STATIC const FfxFloat32 FSR2_EPSILON = 1e-03f; +FFX_STATIC const FfxFloat32 FSR2_TONEMAP_EPSILON = 1e-03f; +FFX_STATIC const FfxFloat32 FSR2_FLT_MAX = 3.402823466e+38f; +FFX_STATIC const FfxFloat32 FSR2_FLT_MIN = 1.175494351e-38f; + +// treat vector truncation warnings as errors +#pragma warning(error: 3206) + +// suppress warnings +#pragma warning(disable: 3205) // conversion from larger type to smaller +#pragma warning(disable: 3571) // in ffxPow(f, e), f could be negative + +// Reconstructed depth usage +FFX_STATIC const FfxFloat32 reconstructedDepthBilinearWeightThreshold = 0.05f; + +// Accumulation +FFX_STATIC const FfxFloat32 averageLanczosWeightPerFrame = 0.74f; // Average lanczos weight for jitter accumulated samples +FFX_STATIC const FfxFloat32 accumulationMaxOnMotion = 4.0f; + +// Auto exposure +FFX_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e8f; + +struct LockState +{ + FfxBoolean NewLock; //Set for both unique new and re-locked new + FfxBoolean WasLockedPrevFrame; //Set to identify if the pixel was already locked (relock) +}; + +FFX_MIN16_F GetNormalizedRemainingLockLifetime(FFX_MIN16_F3 fLockStatus) +{ + const FfxFloat32 fTrust = fLockStatus[LOCK_TRUST]; + + return FFX_MIN16_F(((ffxSaturate(fLockStatus[LOCK_LIFETIME_REMAINING] - LockInitialLifetime()) / LockInitialLifetime())) * fTrust); +} + +LOCK_STATUS_T CreateNewLockSample() +{ + LOCK_STATUS_T fLockStatus = LOCK_STATUS_T(0, 0, 0); + + fLockStatus[LOCK_TRUST] = LOCK_STATUS_F1(1); + + return fLockStatus; +} + +void KillLock(FFX_PARAMETER_INOUT FFX_MIN16_F3 fLockStatus) +{ + fLockStatus[LOCK_LIFETIME_REMAINING] = FFX_MIN16_F(0); +} + +#define SPLIT_LEFT 0 +#define SPLIT_RIGHT 1 +#ifndef SPLIT_SHADER +#define SPLIT_SHADER SPLIT_RIGHT +#endif + +#if FFX_HALF + +#define UPSAMPLE_F FfxFloat16 +#define UPSAMPLE_F2 FfxFloat16x2 +#define UPSAMPLE_F3 FfxFloat16x3 +#define UPSAMPLE_F4 FfxFloat16x4 +#define UPSAMPLE_I FfxInt16 +#define UPSAMPLE_I2 FfxInt16x2 +#define UPSAMPLE_I3 FfxInt16x3 +#define UPSAMPLE_I4 FfxInt16x4 +#define UPSAMPLE_U FfxUInt16 +#define UPSAMPLE_U2 FfxUInt16x2 +#define UPSAMPLE_U3 FfxUInt16x3 +#define UPSAMPLE_U4 FfxUInt16x4 +#define UPSAMPLE_F2_BROADCAST(X) FFX_BROADCAST_MIN_FLOAT16X2(X) +#define UPSAMPLE_F3_BROADCAST(X) FFX_BROADCAST_MIN_FLOAT16X3(X) +#define UPSAMPLE_F4_BROADCAST(X) FFX_BROADCAST_MIN_FLOAT16X4(X) +#define UPSAMPLE_I2_BROADCAST(X) FFX_BROADCAST_MIN_INT16X2(X) +#define UPSAMPLE_I3_BROADCAST(X) FFX_BROADCAST_MIN_INT16X3(X) +#define UPSAMPLE_I4_BROADCAST(X) FFX_BROADCAST_MIN_INT16X4(X) +#define UPSAMPLE_U2_BROADCAST(X) FFX_BROADCAST_MIN_UINT16X2(X) +#define UPSAMPLE_U3_BROADCAST(X) FFX_BROADCAST_MIN_UINT16X3(X) +#define UPSAMPLE_U4_BROADCAST(X) FFX_BROADCAST_MIN_UINT16X4(X) + +#else //FFX_HALF + +#define UPSAMPLE_F FfxFloat32 +#define UPSAMPLE_F2 FfxFloat32x2 +#define UPSAMPLE_F3 FfxFloat32x3 +#define UPSAMPLE_F4 FfxFloat32x4 +#define UPSAMPLE_I FfxInt32 +#define UPSAMPLE_I2 FfxInt32x2 +#define UPSAMPLE_I3 FfxInt32x3 +#define UPSAMPLE_I4 FfxInt32x4 +#define UPSAMPLE_U FfxUInt32 +#define UPSAMPLE_U2 FfxUInt32x2 +#define UPSAMPLE_U3 FfxUInt32x3 +#define UPSAMPLE_U4 FfxUInt32x4 +#define UPSAMPLE_F2_BROADCAST(X) FFX_BROADCAST_FLOAT32X2(X) +#define UPSAMPLE_F3_BROADCAST(X) FFX_BROADCAST_FLOAT32X3(X) +#define UPSAMPLE_F4_BROADCAST(X) FFX_BROADCAST_FLOAT32X4(X) +#define UPSAMPLE_I2_BROADCAST(X) FFX_BROADCAST_INT32X2(X) +#define UPSAMPLE_I3_BROADCAST(X) FFX_BROADCAST_INT32X3(X) +#define UPSAMPLE_I4_BROADCAST(X) FFX_BROADCAST_INT32X4(X) +#define UPSAMPLE_U2_BROADCAST(X) FFX_BROADCAST_UINT32X2(X) +#define UPSAMPLE_U3_BROADCAST(X) FFX_BROADCAST_UINT32X3(X) +#define UPSAMPLE_U4_BROADCAST(X) FFX_BROADCAST_UINT32X4(X) + +#endif //FFX_HALF + +struct RectificationBoxData +{ + UPSAMPLE_F3 boxCenter; + UPSAMPLE_F3 boxVec; + UPSAMPLE_F3 aabbMin; + UPSAMPLE_F3 aabbMax; +}; + +struct RectificationBox +{ + RectificationBoxData data_; + UPSAMPLE_F fBoxCenterWeight; +}; + +void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBox rectificationBox, const UPSAMPLE_F3 initialColorSample) +{ + rectificationBox.fBoxCenterWeight = UPSAMPLE_F(0.0); + + rectificationBox.data_.boxCenter = UPSAMPLE_F3_BROADCAST(0); + rectificationBox.data_.boxVec = UPSAMPLE_F3_BROADCAST(0); + rectificationBox.data_.aabbMin = initialColorSample; + rectificationBox.data_.aabbMax = initialColorSample; +} + +void RectificationBoxAddSample(FFX_PARAMETER_INOUT RectificationBox rectificationBox, const UPSAMPLE_F3 colorSample, const UPSAMPLE_F fSampleWeight) +{ + rectificationBox.data_.aabbMin = ffxMin(rectificationBox.data_.aabbMin, colorSample); + rectificationBox.data_.aabbMax = ffxMax(rectificationBox.data_.aabbMax, colorSample); + UPSAMPLE_F3 weightedSample = colorSample * fSampleWeight; + rectificationBox.data_.boxCenter += weightedSample; + rectificationBox.data_.boxVec += colorSample * weightedSample; + rectificationBox.fBoxCenterWeight += fSampleWeight; +} + +void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBox rectificationBox) +{ + rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > UPSAMPLE_F(FSR2_EPSILON) ? rectificationBox.fBoxCenterWeight : UPSAMPLE_F(1.f)); + rectificationBox.data_.boxCenter /= rectificationBox.fBoxCenterWeight; + rectificationBox.data_.boxVec /= rectificationBox.fBoxCenterWeight; + UPSAMPLE_F3 stdDev = sqrt(abs(rectificationBox.data_.boxVec - rectificationBox.data_.boxCenter * rectificationBox.data_.boxCenter)); + rectificationBox.data_.boxVec = stdDev; +} + +RectificationBoxData RectificationBoxGetData(FFX_PARAMETER_INOUT RectificationBox rectificationBox) +{ + return rectificationBox.data_; +} + +FfxFloat32x3 SafeRcp3(FfxFloat32x3 v) +{ + return (all(FFX_NOT_EQUAL(v, FFX_BROADCAST_FLOAT32X3(0)))) ? (FFX_BROADCAST_FLOAT32X3(1.0f) / v) : FFX_BROADCAST_FLOAT32X3(0.0f); +} + +FfxFloat32 MinDividedByMax(const FfxFloat32 v0, const FfxFloat32 v1) +{ + const FfxFloat32 m = ffxMax(v0, v1); + return m != 0 ? ffxMin(v0, v1) / m : 0; +} + +#if FFX_HALF +FFX_MIN16_F MinDividedByMax(const FFX_MIN16_F v0, const FFX_MIN16_F v1) +{ + const FFX_MIN16_F m = ffxMax(v0, v1); + return m != FFX_MIN16_F(0) ? ffxMin(v0, v1) / m : FFX_MIN16_F(0); +} +#endif + +FfxFloat32 MaxDividedByMin(const FfxFloat32 v0, const FfxFloat32 v1) +{ + const FfxFloat32 m = ffxMin(v0, v1); + return m != 0 ? ffxMax(v0, v1) / m : 0; +} + +FFX_MIN16_F3 RGBToYCoCg_16(FFX_MIN16_F3 fRgb) +{ + FFX_MIN16_F3 fYCoCg; + fYCoCg.x = dot(fRgb.rgb, FFX_MIN16_F3(+0.25f, +0.50f, +0.25f)); + fYCoCg.y = dot(fRgb.rgb, FFX_MIN16_F3(+0.50f, +0.00f, -0.50f)); + fYCoCg.z = dot(fRgb.rgb, FFX_MIN16_F3(-0.25f, +0.50f, -0.25f)); + return fYCoCg; +} + +FFX_MIN16_F3 RGBToYCoCg_V2_16(FFX_MIN16_F3 fRgb) +{ + FFX_MIN16_F a = fRgb.g * FFX_MIN16_F(0.5f); + FFX_MIN16_F b = (fRgb.r + fRgb.b) * FFX_MIN16_F(0.25f); + FFX_MIN16_F3 fYCoCg; + fYCoCg.x = a + b; + fYCoCg.y = (fRgb.r - fRgb.b) * FFX_MIN16_F(0.5f); + fYCoCg.z = a - b; + return fYCoCg; +} + +FfxFloat32x3 YCoCgToRGB(FfxFloat32x3 fYCoCg) +{ + FfxFloat32x3 fRgb; + FfxFloat32 tmp = fYCoCg.x - fYCoCg.z / 2.0; + fRgb.g = fYCoCg.z + tmp; + fRgb.b = tmp - fYCoCg.y / 2.0; + fRgb.r = fRgb.b + fYCoCg.y; + return fRgb; +} +#if FFX_HALF +FFX_MIN16_F3 YCoCgToRGB(FFX_MIN16_F3 fYCoCg) +{ + FFX_MIN16_F3 fRgb; + FFX_MIN16_F tmp = fYCoCg.x - fYCoCg.z * FFX_MIN16_F(0.5f); + fRgb.g = fYCoCg.z + tmp; + fRgb.b = tmp - fYCoCg.y * FFX_MIN16_F(0.5f); + fRgb.r = fRgb.b + fYCoCg.y; + return fRgb; +} +#endif + +FfxFloat32x3 RGBToYCoCg(FfxFloat32x3 fRgb) +{ + FfxFloat32x3 fYCoCg; + fYCoCg.y = fRgb.r - fRgb.b; + FfxFloat32 tmp = fRgb.b + fYCoCg.y / 2.0; + fYCoCg.z = fRgb.g - tmp; + fYCoCg.x = tmp + fYCoCg.z / 2.0; + return fYCoCg; +} +#if FFX_HALF +FFX_MIN16_F3 RGBToYCoCg(FFX_MIN16_F3 fRgb) +{ + FFX_MIN16_F3 fYCoCg; + fYCoCg.y = fRgb.r - fRgb.b; + FFX_MIN16_F tmp = fRgb.b + fYCoCg.y * FFX_MIN16_F(0.5f); + fYCoCg.z = fRgb.g - tmp; + fYCoCg.x = tmp + fYCoCg.z * FFX_MIN16_F(0.5f); + return fYCoCg; +} +#endif + +FfxFloat32x3 RGBToYCoCg_V2(FfxFloat32x3 fRgb) +{ + FfxFloat32 a = fRgb.g * 0.5f; + FfxFloat32 b = (fRgb.r + fRgb.b) * 0.25f; + FfxFloat32x3 fYCoCg; + fYCoCg.x = a + b; + fYCoCg.y = (fRgb.r - fRgb.b) * 0.5f; + fYCoCg.z = a - b; + return fYCoCg; +} + +FfxFloat32 RGBToLuma(FfxFloat32x3 fLinearRgb) +{ + return dot(fLinearRgb, FfxFloat32x3(0.2126f, 0.7152f, 0.0722f)); +} + +FfxFloat32 RGBToPerceivedLuma(FfxFloat32x3 fLinearRgb) +{ + FfxFloat32 fLuminance = RGBToLuma(fLinearRgb); + + FfxFloat32 fPercievedLuminance = 0; + if (fLuminance <= 216.0f / 24389.0f) { + fPercievedLuminance = fLuminance * (24389.0f / 27.0f); + } else { + fPercievedLuminance = ffxPow(fLuminance, 1.0f / 3.0f) * 116.0f - 16.0f; + } + + return fPercievedLuminance * 0.01f; +} + + +FfxFloat32x3 Tonemap(FfxFloat32x3 fRgb) +{ + return fRgb / (ffxMax(ffxMax(0.f, fRgb.r), ffxMax(fRgb.g, fRgb.b)) + 1.f).xxx; +} + +FfxFloat32x3 InverseTonemap(FfxFloat32x3 fRgb) +{ + return fRgb / ffxMax(FSR2_TONEMAP_EPSILON, 1.f - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx; +} + +#if FFX_HALF +FFX_MIN16_F3 Tonemap(FFX_MIN16_F3 fRgb) +{ + return fRgb / (ffxMax(ffxMax(FFX_MIN16_F(0.f), fRgb.r), ffxMax(fRgb.g, fRgb.b)) + FFX_MIN16_F(1.f)).xxx; +} + +FFX_MIN16_F3 InverseTonemap(FFX_MIN16_F3 fRgb) +{ + return fRgb / ffxMax(FFX_MIN16_F(FSR2_TONEMAP_EPSILON), FFX_MIN16_F(1.f) - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx; +} + +FFX_MIN16_I2 ClampLoad(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize) +{ + return clamp(iPxSample + iPxOffset, FFX_MIN16_I2(0, 0), iTextureSize - FFX_MIN16_I2(1, 1)); +} +#endif + +FfxInt32x2 ClampLoad(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize) +{ + return clamp(iPxSample + iPxOffset, FfxInt32x2(0, 0), iTextureSize - FfxInt32x2(1, 1)); +} + +FfxBoolean IsOnScreen(FFX_MIN16_I2 pos, FFX_MIN16_I2 size) +{ + return all(FFX_GREATER_THAN_EQUAL(pos, FFX_BROADCAST_MIN_FLOAT16X2(0))) && all(FFX_LESS_THAN(pos, size)); +} + +FfxFloat32 ComputeAutoExposureFromLavg(FfxFloat32 Lavg) +{ + Lavg = exp(Lavg); + + const FfxFloat32 S = 100.0f; //ISO arithmetic speed + const FfxFloat32 K = 12.5f; + FfxFloat32 ExposureISO100 = log2((Lavg * S) / K); + + const FfxFloat32 q = 0.65f; + FfxFloat32 Lmax = (78.0f / (q * S)) * ffxPow(2.0f, ExposureISO100); + + return 1 / Lmax; +} +#endif // #if defined(FFX_GPU) + +#endif //!defined(FFX_FSR2_COMMON_H) \ No newline at end of file diff --git a/Assets/Shaders/FSR2/ffx_fsr2_common.cginc.meta b/Assets/Shaders/FSR2/ffx_fsr2_common.cginc.meta new file mode 100644 index 0000000..29c7b3d --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_common.cginc.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 7dbd8b5acecb7454f9f68f6c8a331efb +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr2_compute_luminance_pyramid.h b/Assets/Shaders/FSR2/ffx_fsr2_compute_luminance_pyramid.h new file mode 100644 index 0000000..d5bbbcf --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_compute_luminance_pyramid.h @@ -0,0 +1,188 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +FFX_GROUPSHARED FfxUInt32 spdCounter; + +#ifndef SPD_PACKED_ONLY +FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16]; + +FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 tex, FfxUInt32 slice) +{ + FfxFloat32x3 fRgb = LoadInputColor(FfxInt32x2(tex)); + + FFX_STATIC const FfxFloat32x3 rgb2y = FfxFloat32x3(0.2126, 0.7152, 0.0722); + + //compute log luma + const FfxFloat32 fLogLuma = log(ffxMax(FSR2_EPSILON, dot(rgb2y, fRgb))); + + // Make sure out of screen pixels contribute no value to the end result + const FfxFloat32 result = all(FFX_LESS_THAN(tex, RenderSize())) ? fLogLuma : 0.0f; + + return FfxFloat32x4(result, 0, 0, 0); +} + +FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice) +{ + return SPD_LoadMipmap5(tex); +} + +void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice) +{ + if (index == LumaMipLevelToUse() || index == 5) + { + SPD_SetMipmap(pix, index, outValue.r); + } + + if (index == MipCount() - 1) { //accumulate on 1x1 level + + if (all(FFX_EQUAL(pix, FfxInt32x2(0, 0)))) + { + FfxFloat32 prev = SPD_LoadExposureBuffer().y; + FfxUInt32x2 renderSize = SPD_RenderSize(); + FfxFloat32 result = outValue.r / (renderSize.x * renderSize.y); + + if (prev < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values + { + FfxFloat32 rate = 1.0f; + result = prev + (result - prev) * (1 - exp(-DeltaTime() * rate)); + } + FfxFloat32x2 spdOutput = FfxFloat32x2(ComputeAutoExposureFromLavg(result), result); + SPD_SetExposureBuffer(spdOutput); + } + } +} + +void SpdIncreaseAtomicCounter(FfxUInt32 slice) +{ + SPD_IncreaseAtomicCounter(spdCounter); +} + +FfxUInt32 SpdGetAtomicCounter() +{ + return spdCounter; +} + +void SpdResetAtomicCounter(FfxUInt32 slice) +{ + SPD_ResetAtomicCounter(); +} + +FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat32x4( + spdIntermediateR[x][y], + spdIntermediateG[x][y], + spdIntermediateB[x][y], + spdIntermediateA[x][y]); +} +void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) +{ + spdIntermediateR[x][y] = value.x; + spdIntermediateG[x][y] = value.y; + spdIntermediateB[x][y] = value.z; + spdIntermediateA[x][y] = value.w; +} +FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) +{ + return (v0 + v1 + v2 + v3); +} +#endif + +// define fetch and store functions Packed +#if FFX_HALF +#error Callback must be implemented + +FFX_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16]; +FFX_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16]; + +FfxFloat16x4 SpdLoadSourceImageH(FfxFloat32x2 tex, FfxUInt32 slice) +{ + return FfxFloat16x4(imgDst[0][FfxFloat32x3(tex, slice)]); +} +FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice) +{ + return FfxFloat16x4(imgDst6[FfxUInt32x3(p, slice)]); +} +void SpdStoreH(FfxInt32x2 p, FfxFloat16x4 value, FfxUInt32 mip, FfxUInt32 slice) +{ + if (index == LumaMipLevelToUse() || index == 5) + { + imgDst6[FfxUInt32x3(p, slice)] = FfxFloat32x4(value); + return; + } + imgDst[mip + 1][FfxUInt32x3(p, slice)] = FfxFloat32x4(value); +} +void SpdIncreaseAtomicCounter(FfxUInt32 slice) +{ + InterlockedAdd(rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice], 1, spdCounter); +} +FfxUInt32 SpdGetAtomicCounter() +{ + return spdCounter; +} +void SpdResetAtomicCounter(FfxUInt32 slice) +{ + rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice] = 0; +} +FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat16x4( + spdIntermediateRG[x][y].x, + spdIntermediateRG[x][y].y, + spdIntermediateBA[x][y].x, + spdIntermediateBA[x][y].y); +} +void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value) +{ + spdIntermediateRG[x][y] = value.xy; + spdIntermediateBA[x][y] = value.zw; +} +FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3) +{ + return (v0 + v1 + v2 + v3) * FfxFloat16(0.25); +} +#endif + +#include "ffx_spd.h" + +void ComputeAutoExposure(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex) +{ +#if FFX_HALF + SpdDownsampleH( + FfxUInt32x2(WorkGroupId.xy), + FfxUInt32(LocalThreadIndex), + FfxUInt32(MipCount()), + FfxUInt32(NumWorkGroups()), + FfxUInt32(WorkGroupId.z), + FfxUInt32x2(WorkGroupOffset())); +#else + SpdDownsample( + FfxUInt32x2(WorkGroupId.xy), + FfxUInt32(LocalThreadIndex), + FfxUInt32(MipCount()), + FfxUInt32(NumWorkGroups()), + FfxUInt32(WorkGroupId.z), + FfxUInt32x2(WorkGroupOffset())); +#endif +} \ No newline at end of file diff --git a/Assets/Shaders/FSR2/ffx_fsr2_compute_luminance_pyramid.h.meta b/Assets/Shaders/FSR2/ffx_fsr2_compute_luminance_pyramid.h.meta new file mode 100644 index 0000000..93cc535 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_compute_luminance_pyramid.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 27eaecaa70cc09b4c97f68ba43e40a84 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr2_compute_luminance_pyramid_pass.hlsl b/Assets/Shaders/FSR2/ffx_fsr2_compute_luminance_pyramid_pass.hlsl new file mode 100644 index 0000000..8400af2 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_compute_luminance_pyramid_pass.hlsl @@ -0,0 +1,164 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define FSR2_BIND_SRV_INPUT_COLOR 0 +#define FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC 0 +#define FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE 1 +#define FSR2_BIND_UAV_EXPOSURE_MIP_5 2 +#define FSR2_BIND_UAV_EXPOSURE 3 +#define FSR2_BIND_CB_FSR2 0 +#define FSR2_BIND_CB_SPD 1 + +#include "ffx_fsr2_callbacks_hlsl.h" +#include "ffx_fsr2_common.h" + +#if defined(FSR2_BIND_CB_SPD) + cbuffer cbSPD : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_SPD) { + + uint mips; + uint numWorkGroups; + uint2 workGroupOffset; + uint2 renderSize; + }; + + uint MipCount() + { + return mips; + } + + uint NumWorkGroups() + { + return numWorkGroups; + } + + uint2 WorkGroupOffset() + { + return workGroupOffset; + } + + uint2 SPD_RenderSize() + { + return renderSize; + } +#else + uint MipCount() + { + return 0; + } + + uint NumWorkGroups() + { + return 0; + } + + uint2 WorkGroupOffset() + { + return uint2(0, 0); + } + + uint2 SPD_RenderSize() + { + return uint2(0, 0); + } +#endif + + +float2 SPD_LoadExposureBuffer() +{ +#if defined(FSR2_BIND_UAV_EXPOSURE) || defined(FFX_INTERNAL) + return rw_exposure[min16int2(0,0)]; +#else + return 0; +#endif +} + +void SPD_SetExposureBuffer(float2 value) +{ +#if defined(FSR2_BIND_UAV_EXPOSURE) || defined(FFX_INTERNAL) + rw_exposure[min16int2(0,0)] = value; +#endif +} + +float4 SPD_LoadMipmap5(min16int2 iPxPos) +{ +#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_5) || defined(FFX_INTERNAL) + return float4(rw_img_mip_5[iPxPos], 0, 0, 0); +#else + return 0; +#endif +} + +void SPD_SetMipmap(min16int2 iPxPos, int slice, float value) +{ + switch (slice) + { +#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE) || defined(FFX_INTERNAL) + case FFX_FSR2_SHADING_CHANGE_MIP_LEVEL: + rw_img_mip_shading_change[iPxPos] = value; + break; +#endif +#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_5) || defined(FFX_INTERNAL) + case 5: + rw_img_mip_5[iPxPos] = value; + break; +#endif + default: + // avoid flattened side effect +#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE) || defined(FFX_INTERNAL) + rw_img_mip_shading_change[iPxPos] = rw_img_mip_shading_change[iPxPos]; +#elif defined(FSR2_BIND_UAV_EXPOSURE_MIP_5) || defined(FFX_INTERNAL) + rw_img_mip_5[iPxPos] = rw_img_mip_5[iPxPos]; +#endif + break; + } +} + +void SPD_IncreaseAtomicCounter(inout uint spdCounter) +{ + InterlockedAdd(rw_spd_global_atomic[min16int2(0,0)], 1, spdCounter); +} + +void SPD_ResetAtomicCounter() +{ + rw_spd_global_atomic[min16int2(0,0)] = 0; +} + +#include "ffx_fsr2_compute_luminance_pyramid.h" + +#ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#define FFX_FSR2_THREAD_GROUP_WIDTH 256 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#define FFX_FSR2_THREAD_GROUP_HEIGHT 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#define FFX_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#ifndef FFX_FSR2_NUM_THREADS +#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR2_NUM_THREADS + +FFX_FSR2_NUM_THREADS +FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT +void CS(uint3 WorkGroupId : SV_GroupID, uint LocalThreadIndex : SV_GroupIndex) +{ + ComputeAutoExposure(WorkGroupId, LocalThreadIndex); +} diff --git a/Assets/Shaders/FSR2/ffx_fsr2_compute_luminance_pyramid_pass.hlsl.meta b/Assets/Shaders/FSR2/ffx_fsr2_compute_luminance_pyramid_pass.hlsl.meta new file mode 100644 index 0000000..369bdbb --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_compute_luminance_pyramid_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 11a5a62864432d541ac1330a97add861 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr2_depth_clip.h b/Assets/Shaders/FSR2/ffx_fsr2_depth_clip.h new file mode 100644 index 0000000..b44cc59 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_depth_clip.h @@ -0,0 +1,93 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +FFX_STATIC const FfxFloat32 DepthClipBaseScale = 4.0f; + +FfxFloat32 ComputeSampleDepthClip(FFX_MIN16_I2 iPxSamplePos, FfxFloat32 fPreviousDepth, FfxFloat32 fPreviousDepthBilinearWeight, FfxFloat32 fCurrentDepthViewSpace) +{ + FfxFloat32 fPrevNearestDepthViewSpace = abs(ConvertFromDeviceDepthToViewSpace(fPreviousDepth)); + + // Depth separation logic ref: See "Minimum Triangle Separation for Correct Z-Buffer Occlusion" + // Intention: worst case of formula in Figure4 combined with Ksep factor in Section 4 + // TODO: check intention and improve, some banding visible + const FfxFloat32 fHalfViewportWidth = RenderSize().x * 0.5f; + FfxFloat32 fDepthThreshold = ffxMin(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace); + + // WARNING: Ksep only works with reversed-z with infinite projection. + const FfxFloat32 Ksep = 1.37e-05f; + FfxFloat32 fRequiredDepthSeparation = Ksep * fDepthThreshold * TanHalfFoV() * fHalfViewportWidth; + FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace; + + FfxFloat32 fDepthClipFactor = (fDepthDiff > 0) ? ffxSaturate(fRequiredDepthSeparation / fDepthDiff) : 1.0f; + +#ifdef _DEBUG + rw_debug_out[iPxSamplePos] = FfxFloat32x4(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace, fDepthDiff, fDepthClipFactor); +#endif + + return fPreviousDepthBilinearWeight * fDepthClipFactor * DepthClipBaseScale; +} + +FfxFloat32 ComputeDepthClip(FfxFloat32x2 fUvSample, FfxFloat32 fCurrentDepthViewSpace) +{ + FfxFloat32x2 fPxSample = fUvSample * RenderSize() - 0.5f; + FFX_MIN16_I2 iPxSample = FFX_MIN16_I2(floor(fPxSample)); + FfxFloat32x2 fPxFrac = ffxFract(fPxSample); + + const FfxFloat32 fBilinearWeights[2][2] = { + { + (1 - fPxFrac.x) * (1 - fPxFrac.y), + (fPxFrac.x) * (1 - fPxFrac.y) + }, + { + (1 - fPxFrac.x) * (fPxFrac.y), + (fPxFrac.x) * (fPxFrac.y) + } + }; + + FfxFloat32 fDepth = 0.0f; + FfxFloat32 fWeightSum = 0.0f; + for (FfxInt32 y = 0; y <= 1; ++y) { + for (FfxInt32 x = 0; x <= 1; ++x) { + FFX_MIN16_I2 iSamplePos = iPxSample + FFX_MIN16_I2(x, y); + if (IsOnScreen(iSamplePos, FFX_MIN16_I2(RenderSize()))) { + FfxFloat32 fBilinearWeight = fBilinearWeights[y][x]; + if (fBilinearWeight > reconstructedDepthBilinearWeightThreshold) { + fDepth += ComputeSampleDepthClip(iSamplePos, LoadReconstructedPrevDepth(iSamplePos), fBilinearWeight, fCurrentDepthViewSpace); + fWeightSum += fBilinearWeight; + } + } + } + } + + return (fWeightSum > 0) ? fDepth / fWeightSum : DepthClipBaseScale; +} + +void DepthClip(FFX_MIN16_I2 iPxPos) +{ + FfxFloat32x2 fDepthUv = (FfxFloat32x2(iPxPos) + 0.5f) / RenderSize(); + FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos); + FfxFloat32x2 fDilatedUv = fDepthUv + fMotionVector; + FfxFloat32 fCurrentDepthViewSpace = abs(ConvertFromDeviceDepthToViewSpace(LoadDilatedDepth(iPxPos))); + + FfxFloat32 fDepthClip = ComputeDepthClip(fDilatedUv, fCurrentDepthViewSpace); + + StoreDepthClip(iPxPos, fDepthClip); +} diff --git a/Assets/Shaders/FSR2/ffx_fsr2_depth_clip.h.meta b/Assets/Shaders/FSR2/ffx_fsr2_depth_clip.h.meta new file mode 100644 index 0000000..fbd73d0 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_depth_clip.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: f699007ec4275f749bd1370770c71acf +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr2_depth_clip_pass.hlsl b/Assets/Shaders/FSR2/ffx_fsr2_depth_clip_pass.hlsl new file mode 100644 index 0000000..d95d2a7 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_depth_clip_pass.hlsl @@ -0,0 +1,63 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// FSR2 pass 3 +// SRV 7 : FSR2_ReconstructedPrevNearestDepth : r_ReconstructedPrevNearestDepth +// SRV 8 : FSR2_DilatedVelocity : r_dilated_motion_vectors +// SRV 9 : FSR2_DilatedDepth : r_dilatedDepth +// UAV 12 : FSR2_DepthClip : rw_depth_clip +// CB 0 : cbFSR2 + +#define FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 +#define FSR2_BIND_SRV_DILATED_MOTION_VECTORS 1 +#define FSR2_BIND_SRV_DILATED_DEPTH 2 +#define FSR2_BIND_UAV_DEPTH_CLIP 0 +#define FSR2_BIND_CB_FSR2 0 + +#include "ffx_fsr2_callbacks_hlsl.h" +#include "ffx_fsr2_common.h" +#include "ffx_fsr2_sample.h" +#include "ffx_fsr2_depth_clip.h" + +#ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#define FFX_FSR2_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#define FFX_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#ifndef FFX_FSR2_NUM_THREADS +#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR2_NUM_THREADS + +FFX_FSR2_PREFER_WAVE64 +FFX_FSR2_NUM_THREADS +FFX_FSR2_EMBED_ROOTSIG_CONTENT +void CS( + min16int2 iGroupId : SV_GroupID, + min16int2 iDispatchThreadId : SV_DispatchThreadID, + min16int2 iGroupThreadId : SV_GroupThreadID, + int iGroupIndex : SV_GroupIndex) +{ + DepthClip(iDispatchThreadId); +} diff --git a/Assets/Shaders/FSR2/ffx_fsr2_depth_clip_pass.hlsl.meta b/Assets/Shaders/FSR2/ffx_fsr2_depth_clip_pass.hlsl.meta new file mode 100644 index 0000000..af3c305 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_depth_clip_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: a0b2885873743f64aa80c07ced37addf +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr2_lock.h b/Assets/Shaders/FSR2/ffx_fsr2_lock.h new file mode 100644 index 0000000..24323df --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_lock.h @@ -0,0 +1,171 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +FfxFloat32 GetLuma(FFX_MIN16_I2 pos) +{ + //add some bias to avoid locking dark areas + return FfxFloat32(LoadPreparedInputColorLuma(pos)); +} + +FfxFloat32 ComputeThinFeatureConfidence(FFX_MIN16_I2 pos) +{ + const FfxInt32 RADIUS = 1; + + FfxFloat32 fNucleus = GetLuma(pos); + + FfxFloat32 similar_threshold = 1.05f; + FfxFloat32 dissimilarLumaMin = FSR2_FLT_MAX; + FfxFloat32 dissimilarLumaMax = 0; + + /* + 0 1 2 + 3 4 5 + 6 7 8 + */ + + #define SETBIT(x) (1U << x) + + FfxUInt32 mask = SETBIT(4); //flag fNucleus as similar + + const FfxUInt32 rejectionMasks[4] = { + SETBIT(0) | SETBIT(1) | SETBIT(3) | SETBIT(4), //Upper left + SETBIT(1) | SETBIT(2) | SETBIT(4) | SETBIT(5), //Upper right + SETBIT(3) | SETBIT(4) | SETBIT(6) | SETBIT(7), //Lower left + SETBIT(4) | SETBIT(5) | SETBIT(7) | SETBIT(8), //Lower right + }; + + FfxInt32 idx = 0; + FFX_UNROLL + for (FfxInt32 y = -RADIUS; y <= RADIUS; y++) { + FFX_UNROLL + for (FfxInt32 x = -RADIUS; x <= RADIUS; x++, idx++) { + if (x == 0 && y == 0) continue; + + FFX_MIN16_I2 samplePos = ClampLoad(pos, FFX_MIN16_I2(x, y), FFX_MIN16_I2(RenderSize())); + + FfxFloat32 sampleLuma = GetLuma(samplePos); + FfxFloat32 difference = ffxMax(sampleLuma, fNucleus) / ffxMin(sampleLuma, fNucleus); + + if (difference > 0 && (difference < similar_threshold)) { + mask |= SETBIT(idx); + } else { + dissimilarLumaMin = ffxMin(dissimilarLumaMin, sampleLuma); + dissimilarLumaMax = ffxMax(dissimilarLumaMax, sampleLuma); + } + } + } + + FfxBoolean isRidge = fNucleus > dissimilarLumaMax || fNucleus < dissimilarLumaMin; + + if (FFX_FALSE == isRidge) { + + return 0; + } + + FFX_UNROLL + for (FfxInt32 i = 0; i < 4; i++) { + + if ((mask & rejectionMasks[i]) == rejectionMasks[i]) { + return 0; + } + } + + return 1; +} + +FFX_STATIC FfxBoolean s_bLockUpdated = FFX_FALSE; + +LOCK_STATUS_T ComputeLockStatus(FFX_MIN16_I2 iPxLrPos, LOCK_STATUS_T fLockStatus) +{ + FfxFloat32 fConfidenceOfThinFeature = ComputeThinFeatureConfidence(iPxLrPos); + + s_bLockUpdated = FFX_FALSE; + if (fConfidenceOfThinFeature > 0.0f) + { + //put to negative on new lock + fLockStatus[LOCK_LIFETIME_REMAINING] = (fLockStatus[LOCK_LIFETIME_REMAINING] == LOCK_STATUS_F1(0.0f)) ? LOCK_STATUS_F1(-LockInitialLifetime()) : LOCK_STATUS_F1(-(LockInitialLifetime() * 2)); + + s_bLockUpdated = FFX_TRUE; + } + + return fLockStatus; +} + +void ComputeLock(FFX_MIN16_I2 iPxLrPos) +{ + FfxFloat32x2 fSrcJitteredPos = FfxFloat32x2(iPxLrPos) + 0.5f - Jitter(); + FfxFloat32x2 fLrPosInHr = (fSrcJitteredPos / RenderSize()) * DisplaySize(); + FfxFloat32x2 fHrPos = floor(fLrPosInHr) + 0.5; + FFX_MIN16_I2 iPxHrPos = FFX_MIN16_I2(fHrPos); + + LOCK_STATUS_T fLockStatus = ComputeLockStatus(iPxLrPos, LoadLockStatus(iPxHrPos)); + + if ((s_bLockUpdated)) { + StoreLockStatus(iPxHrPos, fLockStatus); + } +} + +FFX_GROUPSHARED FfxFloat32 gs_ReactiveMask[(8 + 4) * (8 + 4)]; + +void StoreReactiveMaskToLDS(FfxUInt32x2 coord, FfxFloat32x2 value) +{ + FfxUInt32 baseIdx = coord.y * 12 + coord.x; + gs_ReactiveMask[baseIdx] = value.x; + gs_ReactiveMask[baseIdx + 1] = value.y; +} + +FfxFloat32 LoadReactiveMaskFromLDS(FfxUInt32x2 coord) +{ + return gs_ReactiveMask[coord.y * 12 + coord.x]; +} + +void PreProcessReactiveMask(FFX_MIN16_I2 iPxLrPos, FfxUInt32x2 groupId, FfxUInt32x2 groupThreadId) +{ +#if OPT_PRECOMPUTE_REACTIVE_MAX && !OPT_USE_EVAL_ACCUMULATION_REACTIVENESS + + if (all(FFX_LESS_THAN(groupThreadId, FFX_BROADCAST_UINT32X2(6)))) { + + FfxInt32x2 iPos = FfxInt32x2(groupId << 3) + FfxInt32x2(groupThreadId << 1) - 1; + FfxFloat32x4 fReactiveMask2x2 = GatherReactiveMask(iPos).wzxy; + + StoreReactiveMaskToLDS(groupThreadId << 1, fReactiveMask2x2.xy); + StoreReactiveMaskToLDS((groupThreadId << 1) + FfxInt32x2(0, 1), fReactiveMask2x2.zw); + } + + FFX_GROUP_MEMORY_BARRIER(); + + FfxFloat32 fReactiveMax = 0.0f; + + for (FfxUInt32 row = 0; row < 4; row++) { + for (FfxUInt32 col = 0; col < 4; col++) { + const FfxUInt32x2 localOffset = groupThreadId + FfxUInt32x2(col, row); + const FfxBoolean bOutOfRenderBounds = any(FFX_GREATER_THAN_EQUAL((FfxInt32x2(groupId << 3) + FfxInt32x2(localOffset)), RenderSize())); + fReactiveMax = bOutOfRenderBounds ? fReactiveMax : ffxMax(fReactiveMax, LoadReactiveMaskFromLDS(localOffset)); + } + } + + // Threshold reactive value + fReactiveMax = fReactiveMax > 0.8f ? fReactiveMax : 0.0f; + + StoreReactiveMax(iPxLrPos, FFX_MIN16_F(fReactiveMax)); + +#endif //OPT_PRECOMPUTE_REACTIVE_MAX && !OPT_USE_EVAL_ACCUMULATION_REACTIVENESS +} diff --git a/Assets/Shaders/FSR2/ffx_fsr2_lock.h.meta b/Assets/Shaders/FSR2/ffx_fsr2_lock.h.meta new file mode 100644 index 0000000..2b78048 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_lock.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 752646c17a9b5a046b6c8abf0436bc21 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr2_lock_pass.hlsl b/Assets/Shaders/FSR2/ffx_fsr2_lock_pass.hlsl new file mode 100644 index 0000000..2f9c20e --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_lock_pass.hlsl @@ -0,0 +1,66 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// FSR2 pass 4 +// SRV 5 : m_UpscaleReactive : r_reactive_mask +// SRV 11 : FSR2_LockStatus2 : r_lock_status +// SRV 13 : FSR2_PreparedInputColor : r_prepared_input_color +// UAV 11 : FSR2_LockStatus1 : rw_lock_status +// UAV 27 : FSR2_ReactiveMaskMax : rw_reactive_max +// CB 0 : cbFSR2 +// CB 1 : FSR2DispatchOffsets + +#define FSR2_BIND_SRV_REACTIVE_MASK 0 +#define FSR2_BIND_SRV_LOCK_STATUS 1 +#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 2 +#define FSR2_BIND_UAV_LOCK_STATUS 0 +#define FSR2_BIND_UAV_REACTIVE_MASK_MAX 1 +#define FSR2_BIND_CB_FSR2 0 + +#include "ffx_fsr2_callbacks_hlsl.h" +#include "ffx_fsr2_common.h" +#include "ffx_fsr2_sample.h" +#include "ffx_fsr2_lock.h" + +#ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#define FFX_FSR2_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#define FFX_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#ifndef FFX_FSR2_NUM_THREADS +#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR2_NUM_THREADS + +FFX_FSR2_PREFER_WAVE64 +FFX_FSR2_NUM_THREADS +FFX_FSR2_EMBED_ROOTSIG_CONTENT +void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID) +{ + uint2 uDispatchThreadId = uGroupId * uint2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + uGroupThreadId; + + ComputeLock(uDispatchThreadId); + + PreProcessReactiveMask(uDispatchThreadId, uGroupId, uGroupThreadId); +} diff --git a/Assets/Shaders/FSR2/ffx_fsr2_lock_pass.hlsl.meta b/Assets/Shaders/FSR2/ffx_fsr2_lock_pass.hlsl.meta new file mode 100644 index 0000000..1f8ab48 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_lock_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 9ebb55b63a3165640bb00e58ba134e3a +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr2_postprocess_lock_status.h b/Assets/Shaders/FSR2/ffx_fsr2_postprocess_lock_status.h new file mode 100644 index 0000000..06c5495 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_postprocess_lock_status.h @@ -0,0 +1,85 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR2_POSTPROCESS_LOCK_STATUS_H +#define FFX_FSR2_POSTPROCESS_LOCK_STATUS_H + +FFX_MIN16_F4 WrapShadingChangeLuma(FFX_MIN16_I2 iPxSample) +{ + return FFX_MIN16_F4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0); +} + +DeclareCustomFetchBilinearSamples(FetchShadingChangeLumaSamples, WrapShadingChangeLuma) +DeclareCustomTextureSample(ShadingChangeLumaSample, Bilinear, FetchShadingChangeLumaSamples) + +FFX_MIN16_F GetShadingChangeLuma(FfxFloat32x2 fUvCoord) +{ + // const FfxFloat32 fShadingChangeLuma = exp(ShadingChangeLumaSample(fUvCoord, LumaMipDimensions()) * LumaMipRcp()); + const FFX_MIN16_F fShadingChangeLuma = FFX_MIN16_F(exp(SampleMipLuma(fUvCoord, LumaMipLevelToUse()) * FFX_MIN16_F(LumaMipRcp()))); + return fShadingChangeLuma; +} + +LockState GetLockState(LOCK_STATUS_T fLockStatus) +{ + LockState state = { FFX_FALSE, FFX_FALSE }; + + //Check if this is a new or refreshed lock + state.NewLock = fLockStatus[LOCK_LIFETIME_REMAINING] < LOCK_STATUS_F1(0.0f); + + //For a non-refreshed lock, the lifetime is set to LockInitialLifetime() + state.WasLockedPrevFrame = fLockStatus[LOCK_TRUST] != LOCK_STATUS_F1(0.0f); + + return state; +} + +LockState PostProcessLockStatus(FFX_MIN16_I2 iPxHrPos, FFX_PARAMETER_IN FfxFloat32x2 fLrUvJittered, FFX_PARAMETER_IN FFX_MIN16_F fDepthClipFactor, FFX_PARAMETER_IN FfxFloat32 fHrVelocity, + FFX_PARAMETER_INOUT FfxFloat32 fAccumulationTotalWeight, FFX_PARAMETER_INOUT LOCK_STATUS_T fLockStatus, FFX_PARAMETER_OUT FFX_MIN16_F fLuminanceDiff) { + + const LockState state = GetLockState(fLockStatus); + + fLockStatus[LOCK_LIFETIME_REMAINING] = abs(fLockStatus[LOCK_LIFETIME_REMAINING]); + + FFX_MIN16_F fShadingChangeLuma = GetShadingChangeLuma(fLrUvJittered); + + //init temporal shading change factor, init to -1 or so in reproject to know if "true new"? + fLockStatus[LOCK_TEMPORAL_LUMA] = (fLockStatus[LOCK_TEMPORAL_LUMA] == LOCK_STATUS_F1(0.0f)) ? fShadingChangeLuma : fLockStatus[LOCK_TEMPORAL_LUMA]; + + FFX_MIN16_F fPreviousShadingChangeLuma = fLockStatus[LOCK_TEMPORAL_LUMA]; + fLockStatus[LOCK_TEMPORAL_LUMA] = ffxLerp(fLockStatus[LOCK_TEMPORAL_LUMA], LOCK_STATUS_F1(fShadingChangeLuma), LOCK_STATUS_F1(0.5f)); + fLuminanceDiff = FFX_MIN16_F(1) - MinDividedByMax(fPreviousShadingChangeLuma, fShadingChangeLuma); + + if (fLuminanceDiff > FFX_MIN16_F(0.2f)) { + KillLock(fLockStatus); + } + + if (!state.NewLock && fLockStatus[LOCK_LIFETIME_REMAINING] >= LOCK_STATUS_F1(0)) + { + const FFX_MIN16_F depthClipThreshold = FFX_MIN16_F(0.99f); + if (fDepthClipFactor < depthClipThreshold) + { + KillLock(fLockStatus); + } + } + + return state; +} + +#endif //!defined( FFX_FSR2_POSTPROCESS_LOCK_STATUS_H ) diff --git a/Assets/Shaders/FSR2/ffx_fsr2_postprocess_lock_status.h.meta b/Assets/Shaders/FSR2/ffx_fsr2_postprocess_lock_status.h.meta new file mode 100644 index 0000000..564219c --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_postprocess_lock_status.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 56fcb16f22e48dd47a24bd70fc4dd53b +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr2_prepare_input_color.h b/Assets/Shaders/FSR2/ffx_fsr2_prepare_input_color.h new file mode 100644 index 0000000..b9772b6 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_prepare_input_color.h @@ -0,0 +1,83 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//TODO: Move to common location & share with Accumulate +void ClearResourcesForNextFrame(in FFX_MIN16_I2 iPxHrPos) +{ + if (all(FFX_LESS_THAN(iPxHrPos, FFX_MIN16_I2(RenderSize())))) + { +#if FFX_FSR2_OPTION_INVERTED_DEPTH + const FfxUInt32 farZ = 0x0; +#else + const FfxUInt32 farZ = 0x3f800000; +#endif + SetReconstructedDepth(iPxHrPos, farZ); + } +} + +void ComputeLumaStabilityFactor(FFX_MIN16_I2 iPxLrPos, FfxFloat32 fCurrentFrameLuma) +{ + FfxFloat32x4 fCurrentFrameLumaHistory = LoadRwLumaHistory(iPxLrPos); + + fCurrentFrameLumaHistory.a = FfxFloat32(0); + + if (FrameIndex() > 3) { + FfxFloat32 fDiffs0 = MinDividedByMax(fCurrentFrameLumaHistory[2], fCurrentFrameLuma); + FfxFloat32 fDiffs1 = ffxMax(MinDividedByMax(fCurrentFrameLumaHistory[0], fCurrentFrameLuma), MinDividedByMax(fCurrentFrameLumaHistory[1], fCurrentFrameLuma)); + + fCurrentFrameLumaHistory.a = ffxSaturate(fDiffs1 - fDiffs0); + } + + //move history + fCurrentFrameLumaHistory[0] = fCurrentFrameLumaHistory[1]; + fCurrentFrameLumaHistory[1] = fCurrentFrameLumaHistory[2]; + fCurrentFrameLumaHistory[2] = fCurrentFrameLuma; + + StoreLumaHistory(iPxLrPos, fCurrentFrameLumaHistory); +} + +void PrepareInputColor(FFX_MIN16_I2 iPxLrPos) +{ + //We assume linear data. if non-linear input (sRGB, ...), + //then we should convert to linear first and back to sRGB on output. + + FfxFloat32x3 fRgb = ffxMax(FFX_BROADCAST_FLOAT32X3(0), LoadInputColor(iPxLrPos)); + + fRgb *= Exposure(); + +#if FFX_FSR2_OPTION_HDR_COLOR_INPUT + // Tonemap color, used in lockstatus and luma stability computations + fRgb = Tonemap(fRgb); +#endif + + PREPARED_INPUT_COLOR_T fYCoCg; + + fYCoCg.xyz = PREPARED_INPUT_COLOR_F3(RGBToYCoCg(fRgb)); + + const FfxFloat32 fPerceivedLuma = RGBToPerceivedLuma(fRgb); + ComputeLumaStabilityFactor(iPxLrPos, fPerceivedLuma); + + //compute luma used to lock pixels, if used elsewhere the ffxPow must be moved! + fYCoCg.w = PREPARED_INPUT_COLOR_F1(ffxPow(fPerceivedLuma, 1.0f / 6.0f)); + + StorePreparedInputColor(iPxLrPos, fYCoCg); + ClearResourcesForNextFrame(iPxLrPos); +} diff --git a/Assets/Shaders/FSR2/ffx_fsr2_prepare_input_color.h.meta b/Assets/Shaders/FSR2/ffx_fsr2_prepare_input_color.h.meta new file mode 100644 index 0000000..9d39ab4 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_prepare_input_color.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 5a9511d4524bc6d41ae1c3dc8728eb95 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr2_prepare_input_color_pass.hlsl b/Assets/Shaders/FSR2/ffx_fsr2_prepare_input_color_pass.hlsl new file mode 100644 index 0000000..b8d258a --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_prepare_input_color_pass.hlsl @@ -0,0 +1,64 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// FSR2 pass 1 +// SRV 1 : m_HDR : r_input_color_jittered +// SRV 4 : FSR2_Exposure : r_exposure +// UAV 7 : FSR2_ReconstructedPrevNearestDepth : rw_ReconstructedPrevNearestDepth +// UAV 13 : FSR2_PreparedInputColor : rw_prepared_input_color +// UAV 14 : FSR2_LumaHistory : rw_luma_history +// CB 0 : cbFSR2 + +#define FSR2_BIND_SRV_INPUT_COLOR 0 +#define FSR2_BIND_SRV_EXPOSURE 1 +#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 +#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR 1 +#define FSR2_BIND_UAV_LUMA_HISTORY 2 +#define FSR2_BIND_CB_FSR2 0 + +#include "ffx_fsr2_callbacks_hlsl.h" +#include "ffx_fsr2_common.h" +#include "ffx_fsr2_prepare_input_color.h" + +#ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#define FFX_FSR2_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#define FFX_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#ifndef FFX_FSR2_NUM_THREADS +#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR2_NUM_THREADS + +FFX_FSR2_NUM_THREADS +FFX_FSR2_EMBED_ROOTSIG_CONTENT +void CS( + uint2 uGroupId : SV_GroupID, + uint2 uDispatchThreadId : SV_DispatchThreadID, + uint2 uGroupThreadId : SV_GroupThreadID, + uint uGroupIndex : SV_GroupIndex +) +{ + PrepareInputColor(uDispatchThreadId); +} diff --git a/Assets/Shaders/FSR2/ffx_fsr2_prepare_input_color_pass.hlsl.meta b/Assets/Shaders/FSR2/ffx_fsr2_prepare_input_color_pass.hlsl.meta new file mode 100644 index 0000000..a66a43a --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_prepare_input_color_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: 7aa1ba5d320042e4b9eb1a2f2a8d1295 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr2_rcas.h b/Assets/Shaders/FSR2/ffx_fsr2_rcas.h new file mode 100644 index 0000000..0429d8f --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_rcas.h @@ -0,0 +1,105 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define GROUP_SIZE 8 + +#define FSR_RCAS_DENOISE 1 + +void WriteUpscaledOutput(FFX_MIN16_U2 iPxHrPos, FfxFloat32x3 fUpscaledColor) +{ + StoreUpscaledOutput(FFX_MIN16_I2(iPxHrPos), fUpscaledColor); +} + +#if FFX_HALF + #define FSR_RCAS_H + FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p) + { + FfxFloat32x4 inputSample = LoadRCAS_Input(p); //TODO: fix type + + inputSample.rgb *= Exposure(); + +#if FFX_FSR2_OPTION_HDR_COLOR_INPUT + inputSample.rgb = Tonemap(inputSample.rgb); +#endif // #if FFX_FSR2_OPTION_HDR_COLOR_INPUT + + return FfxFloat16x4(inputSample); + } + void FsrRcasInputH(inout FfxFloat16 r, inout FfxFloat16 g, inout FfxFloat16 b) {} +#else + #define FSR_RCAS_F + FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p) + { + FfxFloat32x4 inputSample = LoadRCAS_Input(p); + + inputSample.rgb *= Exposure(); + +#if FFX_FSR2_OPTION_HDR_COLOR_INPUT + inputSample.rgb = Tonemap(inputSample.rgb); +#endif + + return inputSample; + } + + void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {} +#endif // #if FFX_HALF + +#include "ffx_fsr1.h" + + +void CurrFilter(FFX_MIN16_U2 pos) +{ +#if FFX_HALF + FfxFloat16x3 c; + FsrRcasH(c.r, c.g, c.b, pos, RCASConfig()); + +#if FFX_FSR2_OPTION_HDR_COLOR_INPUT + c = InverseTonemap(c); +#endif + + c /= FfxFloat16(Exposure()); + + WriteUpscaledOutput(pos, c); //TODO: fix type +#else + FfxFloat32x3 c; + FsrRcasF(c.r, c.g, c.b, pos, RCASConfig()); + +#if FFX_FSR2_OPTION_HDR_COLOR_INPUT + c = InverseTonemap(c); +#endif + + c /= Exposure(); + + WriteUpscaledOutput(pos, c); +#endif +} + +void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid) +{ + // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. + FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u); + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.x += 8u; + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.y += 8u; + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.x -= 8u; + CurrFilter(FFX_MIN16_U2(gxy)); +} diff --git a/Assets/Shaders/FSR2/ffx_fsr2_rcas.h.meta b/Assets/Shaders/FSR2/ffx_fsr2_rcas.h.meta new file mode 100644 index 0000000..271ebc1 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_rcas.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 91f9f415097f9c94888afa6b3753b4e9 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr2_rcas_pass.hlsl b/Assets/Shaders/FSR2/ffx_fsr2_rcas_pass.hlsl new file mode 100644 index 0000000..ea6b35a --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_rcas_pass.hlsl @@ -0,0 +1,88 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// FSR2 pass 6 +// SRV 4 : m_Exposure : r_exposure +// SRV 19 : FSR2_InternalUpscaled1 : r_rcas_input +// UAV 18 : DisplayOutput : rw_upscaled_output +// CB 0 : cbFSR2 +// CB 1 : cbRCAS + +#define FSR2_BIND_SRV_EXPOSURE 0 +#define FSR2_BIND_SRV_RCAS_INPUT 1 +#define FSR2_BIND_UAV_UPSCALED_OUTPUT 0 +#define FSR2_BIND_CB_FSR2 0 +#define FSR2_BIND_CB_RCAS 1 + +#include "ffx_fsr2_callbacks_hlsl.h" +#include "ffx_fsr2_common.h" + +//Move to prototype shader! +#if defined(FSR2_BIND_CB_RCAS) + cbuffer cbRCAS : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_RCAS) + { + uint4 rcasConfig; + }; + + uint4 RCASConfig() + { + return rcasConfig; + } +#else + uint4 RCASConfig() + { + return 0; + } +#endif + +#if FFX_HALF +float4 LoadRCAS_Input(FfxInt16x2 iPxPos) +{ + return r_rcas_input[iPxPos]; +} +#else +float4 LoadRCAS_Input(FfxInt32x2 iPxPos) +{ + return r_rcas_input[iPxPos]; +} +#endif + +#include "ffx_fsr2_rcas.h" + +#ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#define FFX_FSR2_THREAD_GROUP_WIDTH 64 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#define FFX_FSR2_THREAD_GROUP_HEIGHT 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#define FFX_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#ifndef FFX_FSR2_NUM_THREADS +#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR2_NUM_THREADS + +FFX_FSR2_NUM_THREADS +FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT +void CS(uint3 LocalThreadId : SV_GroupThreadID, uint3 WorkGroupId : SV_GroupID, uint3 Dtid : SV_DispatchThreadID) +{ + RCAS(LocalThreadId, WorkGroupId, Dtid); +} diff --git a/Assets/Shaders/FSR2/ffx_fsr2_rcas_pass.hlsl.meta b/Assets/Shaders/FSR2/ffx_fsr2_rcas_pass.hlsl.meta new file mode 100644 index 0000000..86d564b --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_rcas_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: fdf63c16108ce284bba003f9d3e389c0 +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h b/Assets/Shaders/FSR2/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h new file mode 100644 index 0000000..655fa13 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h @@ -0,0 +1,135 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H +#define FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H + +void ReconstructPrevDepth(FFX_MIN16_I2 iPxPos, FfxFloat32 fDepth, FfxFloat32x2 fMotionVector, FFX_MIN16_I2 iPxDepthSize) +{ + FfxFloat32x2 fDepthUv = (FfxFloat32x2(iPxPos) + 0.5f) / iPxDepthSize; + FfxFloat32x2 fPxPrevPos = (fDepthUv + fMotionVector) * FfxFloat32x2(iPxDepthSize)-0.5f; + FFX_MIN16_I2 iPxPrevPos = FFX_MIN16_I2(floor(fPxPrevPos)); + FfxFloat32x2 fPxFrac = ffxFract(fPxPrevPos); + + const FfxFloat32 bilinearWeights[2][2] = { + { + (1 - fPxFrac.x) * (1 - fPxFrac.y), + (fPxFrac.x) * (1 - fPxFrac.y) + }, + { + (1 - fPxFrac.x) * (fPxFrac.y), + (fPxFrac.x) * (fPxFrac.y) + } + }; + + // Project current depth into previous frame locations. + // Push to all pixels having some contribution if reprojection is using bilinear logic. + for (FfxInt32 y = 0; y <= 1; ++y) { + for (FfxInt32 x = 0; x <= 1; ++x) { + + FFX_MIN16_I2 offset = FFX_MIN16_I2(x, y); + FfxFloat32 w = bilinearWeights[y][x]; + + if (w > reconstructedDepthBilinearWeightThreshold) { + + FFX_MIN16_I2 storePos = iPxPrevPos + offset; + if (IsOnScreen(storePos, iPxDepthSize)) { + StoreReconstructedDepth(storePos, fDepth); + } + } + } + } +} + +void FindNearestDepth(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_I2 iPxSize, FFX_PARAMETER_OUT FfxFloat32 fNearestDepth, FFX_PARAMETER_OUT FFX_MIN16_I2 fNearestDepthCoord) +{ + const FfxInt32 iSampleCount = 9; + const FFX_MIN16_I2 iSampleOffsets[iSampleCount] = { + FFX_MIN16_I2(+0, +0), + FFX_MIN16_I2(+1, +0), + FFX_MIN16_I2(+0, +1), + FFX_MIN16_I2(+0, -1), + FFX_MIN16_I2(-1, +0), + FFX_MIN16_I2(-1, +1), + FFX_MIN16_I2(+1, +1), + FFX_MIN16_I2(-1, -1), + FFX_MIN16_I2(+1, -1), + }; + + // pull out the depth loads to allow SC to batch them + FfxFloat32 depth[9]; + FfxInt32 iSampleIndex = 0; + FFX_UNROLL + for (iSampleIndex = 0; iSampleIndex < iSampleCount; ++iSampleIndex) { + + FFX_MIN16_I2 iPos = iPxPos + iSampleOffsets[iSampleIndex]; + depth[iSampleIndex] = LoadInputDepth(iPos); + } + + // find closest depth + fNearestDepthCoord = iPxPos; + fNearestDepth = depth[0]; + FFX_UNROLL + for (iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex) { + + FFX_MIN16_I2 iPos = iPxPos + iSampleOffsets[iSampleIndex]; + if (IsOnScreen(iPos, iPxSize)) { + + FfxFloat32 fNdDepth = depth[iSampleIndex]; +#if FFX_FSR2_OPTION_INVERTED_DEPTH + if (fNdDepth > fNearestDepth) { +#else + if (fNdDepth < fNearestDepth) { +#endif + fNearestDepthCoord = iPos; + fNearestDepth = fNdDepth; + } + } + } +} + +void ReconstructPrevDepthAndDilateMotionVectors(FFX_MIN16_I2 iPxLrPos) +{ + FFX_MIN16_I2 iPxLrSize = FFX_MIN16_I2(RenderSize()); + FFX_MIN16_I2 iPxHrSize = FFX_MIN16_I2(DisplaySize()); + + FfxFloat32 fDilatedDepth; + FFX_MIN16_I2 iNearestDepthCoord; + + FindNearestDepth(iPxLrPos, iPxLrSize, fDilatedDepth, iNearestDepthCoord); + +#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS + FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iNearestDepthCoord); +#else + FfxFloat32x2 fSrcJitteredPos = FfxFloat32x2(iNearestDepthCoord) + 0.5f - Jitter(); + FfxFloat32x2 fLrPosInHr = (fSrcJitteredPos / iPxLrSize) * iPxHrSize; + FfxFloat32x2 fHrPos = floor(fLrPosInHr) + 0.5; + + FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(FFX_MIN16_I2(fHrPos)); +#endif + + StoreDilatedDepth(iPxLrPos, fDilatedDepth); + StoreDilatedMotionVector(iPxLrPos, fDilatedMotionVector); + + ReconstructPrevDepth(iPxLrPos, fDilatedDepth, fDilatedMotionVector, iPxLrSize); +} + +#endif //!defined( FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H ) diff --git a/Assets/Shaders/FSR2/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h.meta b/Assets/Shaders/FSR2/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h.meta new file mode 100644 index 0000000..8fa69d3 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 4dea78896bcaeff43b67a5f16a56e2ef +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr2_reconstruct_previous_depth_pass.hlsl b/Assets/Shaders/FSR2/ffx_fsr2_reconstruct_previous_depth_pass.hlsl new file mode 100644 index 0000000..21825cb --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_reconstruct_previous_depth_pass.hlsl @@ -0,0 +1,66 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// FSR2 pass 2 +// SRV 2 : m_MotionVector : r_motion_vectors +// SRV 3 : m_depthbuffer : r_depth +// UAV 7 : FSR2_ReconstructedPrevNearestDepth : rw_ReconstructedPrevNearestDepth +// UAV 8 : FSR2_DilatedVelocity : rw_dilated_motion_vectors +// UAV 9 : FSR2_DilatedDepth : rw_dilatedDepth +// CB 0 : cbFSR2 + +#define FSR2_BIND_SRV_MOTION_VECTORS 0 +#define FSR2_BIND_SRV_DEPTH 1 +#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 +#define FSR2_BIND_UAV_DILATED_MOTION_VECTORS 1 +#define FSR2_BIND_UAV_DILATED_DEPTH 2 +#define FSR2_BIND_CB_FSR2 0 + +#include "ffx_fsr2_callbacks_hlsl.h" +#include "ffx_fsr2_common.h" +#include "ffx_fsr2_sample.h" +#include "ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h" + +#ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#define FFX_FSR2_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#define FFX_FSR2_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#define FFX_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#ifndef FFX_FSR2_NUM_THREADS +#define FFX_FSR2_NUM_THREADS [numthreads(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT, FFX_FSR2_THREAD_GROUP_DEPTH)] +#endif // #ifndef FFX_FSR2_NUM_THREADS + +FFX_FSR2_PREFER_WAVE64 +FFX_FSR2_NUM_THREADS +FFX_FSR2_EMBED_ROOTSIG_CONTENT +void CS( + min16int2 iGroupId : SV_GroupID, + min16int2 iDispatchThreadId : SV_DispatchThreadID, + min16int2 iGroupThreadId : SV_GroupThreadID, + int iGroupIndex : SV_GroupIndex +) +{ + ReconstructPrevDepthAndDilateMotionVectors(iDispatchThreadId); +} diff --git a/Assets/Shaders/FSR2/ffx_fsr2_reconstruct_previous_depth_pass.hlsl.meta b/Assets/Shaders/FSR2/ffx_fsr2_reconstruct_previous_depth_pass.hlsl.meta new file mode 100644 index 0000000..d7a88f2 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_reconstruct_previous_depth_pass.hlsl.meta @@ -0,0 +1,7 @@ +fileFormatVersion: 2 +guid: bb5d100e3ed07fa46b20687e0b74726c +ShaderIncludeImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr2_reproject.h b/Assets/Shaders/FSR2/ffx_fsr2_reproject.h new file mode 100644 index 0000000..3fceafd --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_reproject.h @@ -0,0 +1,93 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR2_REPROJECT_H +#define FFX_FSR2_REPROJECT_H + +FFX_MIN16_F4 WrapHistory(FfxInt32x2 iPxSample) +{ + return LoadHistory(iPxSample); +} + +DeclareCustomFetchBicubicSamplesMin16(FetchHistorySamples, WrapHistory) +DeclareCustomTextureSample(HistorySample, Lanczos2, FetchHistorySamples) + + +FFX_MIN16_F4 WrapLockStatus(FfxInt32x2 iPxSample) +{ + return FFX_MIN16_F4(LoadLockStatus(FFX_MIN16_I2(iPxSample)), 0); +} + +#if 1 +DeclareCustomFetchBilinearSamples(FetchLockStatusSamples, WrapLockStatus) +DeclareCustomTextureSample(LockStatusSample, Bilinear, FetchLockStatusSamples) +#else +DeclareCustomFetchBicubicSamplesMin16(FetchLockStatusSamples, WrapLockStatus) +DeclareCustomTextureSample(LockStatusSample, Lanczos2, FetchLockStatusSamples) +#endif + + + +FfxFloat32x2 GetMotionVector(FFX_MIN16_I2 iPxHrPos, FfxFloat32x2 fHrUv) +{ +#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS + FfxFloat32x2 fDilatedMotionVector = LoadDilatedMotionVector(FFX_MIN16_I2(fHrUv * RenderSize())); +#else + FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iPxHrPos); +#endif + + return fDilatedMotionVector; +} + +void ComputeReprojectedUVs(FfxInt32x2 iPxHrPos, FfxFloat32x2 fMotionVector, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxBoolean bIsExistingSample) +{ + FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize(); + fReprojectedHrUv = fHrUv + fMotionVector; + + bIsExistingSample = (fReprojectedHrUv.x >= 0.0f && fReprojectedHrUv.x <= 1.0f) && + (fReprojectedHrUv.y >= 0.0f && fReprojectedHrUv.y <= 1.0f); +} + +void ReprojectHistoryColor(FfxInt32x2 iPxHrPos, FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxFloat32x4 fHistoryColorAndWeight) +{ + fHistoryColorAndWeight = HistorySample(fReprojectedHrUv, DisplaySize()); + fHistoryColorAndWeight.rgb *= Exposure(); + +#if FFX_FSR2_OPTION_HDR_COLOR_INPUT + fHistoryColorAndWeight.rgb = Tonemap(fHistoryColorAndWeight.rgb); +#endif + + fHistoryColorAndWeight.rgb = RGBToYCoCg(fHistoryColorAndWeight.rgb); +} + +void ReprojectHistoryLockStatus(FfxInt32x2 iPxHrPos, FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT LOCK_STATUS_T fReprojectedLockStatus) +{ + // If function is called from Accumulate pass, we need to treat locks differently + LOCK_STATUS_F1 fInPlaceLockLifetime = LoadRwLockStatus(iPxHrPos)[LOCK_LIFETIME_REMAINING]; + + fReprojectedLockStatus = SampleLockStatus(fReprojectedHrUv); + + // Keep lifetime if new lock + if (fInPlaceLockLifetime < 0.0f) { + fReprojectedLockStatus[LOCK_LIFETIME_REMAINING] = fInPlaceLockLifetime; + } +} +#endif //!defined( FFX_FSR2_REPROJECT_H ) diff --git a/Assets/Shaders/FSR2/ffx_fsr2_reproject.h.meta b/Assets/Shaders/FSR2/ffx_fsr2_reproject.h.meta new file mode 100644 index 0000000..ec6b1dc --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_reproject.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 6b8b8387996ecd54aa40df858e3fdcba +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr2_resources.h b/Assets/Shaders/FSR2/ffx_fsr2_resources.h new file mode 100644 index 0000000..2309351 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_resources.h @@ -0,0 +1,89 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR2_RESOURCES_H +#define FFX_FSR2_RESOURCES_H + +#if defined(FFX_CPU) || defined(FFX_GPU) +#define FFX_FSR2_RESOURCE_IDENTIFIER_NULL 0 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR 1 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS 2 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH 3 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE 4 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK 5 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK 6 +#define FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH 7 +#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS 8 +#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH 9 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR 10 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS 11 +#define FFX_FSR2_RESOURCE_IDENTIFIER_DEPTH_CLIP 12 +#define FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR 13 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY 14 +#define FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT 15 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT 16 +#define FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT 17 +#define FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT 18 +#define FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT 19 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1 20 +#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2 21 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1 22 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 23 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY 24 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_TRANSPARENCY_AND_COMPOSITION 25 +#define FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT 26 +#define FFX_FSR2_RESOURCE_IDENTIFIER_REACTIVE_MAX 27 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE 28 // same as FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_0 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_0 28 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_1 29 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_2 30 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_3 31 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_4 32 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_5 33 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_6 34 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_7 35 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_8 36 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_9 37 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_10 38 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_11 39 +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_12 40 +#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE 41 +#define FFX_FSR2_RESOURCE_IDENTIFIER_EXPOSURE 42 + +// Shading change detection mip level setting, value must be in the range [FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_0, FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_12] +#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_SHADING_CHANGE FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_4 +#define FFX_FSR2_SHADING_CHANGE_MIP_LEVEL (FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE_MIPMAP_SHADING_CHANGE - FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE) + +#define FFX_FSR2_RESOURCE_IDENTIFIER_COUNT 43 + + +#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2 0 +#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD 1 +#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS 2 + +#define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP 1 +#define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP 2 +#define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD 4 +#define FFX_FSR2_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX 8 + +#endif // #if defined(FFX_CPU) || defined(FFX_GPU) + +#endif //!defined( FFX_FSR2_RESOURCES_H ) diff --git a/Assets/Shaders/FSR2/ffx_fsr2_resources.h.meta b/Assets/Shaders/FSR2/ffx_fsr2_resources.h.meta new file mode 100644 index 0000000..c71b67a --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_resources.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: e9b91583116147d44a72720101257539 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr2_sample.h b/Assets/Shaders/FSR2/ffx_fsr2_sample.h new file mode 100644 index 0000000..95fa51d --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_sample.h @@ -0,0 +1,494 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR2_SAMPLE_H +#define FFX_FSR2_SAMPLE_H + +// suppress warnings +#ifdef FFX_HLSL +#pragma warning(disable: 4008) // potentially divide by zero +#endif //FFX_HLSL + +struct FetchedBilinearSamples { + + FfxFloat32x4 fColor00; + FfxFloat32x4 fColor10; + + FfxFloat32x4 fColor01; + FfxFloat32x4 fColor11; +}; + +struct FetchedBicubicSamples { + + FfxFloat32x4 fColor00; + FfxFloat32x4 fColor10; + FfxFloat32x4 fColor20; + FfxFloat32x4 fColor30; + + FfxFloat32x4 fColor01; + FfxFloat32x4 fColor11; + FfxFloat32x4 fColor21; + FfxFloat32x4 fColor31; + + FfxFloat32x4 fColor02; + FfxFloat32x4 fColor12; + FfxFloat32x4 fColor22; + FfxFloat32x4 fColor32; + + FfxFloat32x4 fColor03; + FfxFloat32x4 fColor13; + FfxFloat32x4 fColor23; + FfxFloat32x4 fColor33; +}; + +#if FFX_HALF +struct FetchedBicubicSamplesMin16 { + + FfxFloat16x4 fColor00; + FfxFloat16x4 fColor10; + FfxFloat16x4 fColor20; + FfxFloat16x4 fColor30; + + FfxFloat16x4 fColor01; + FfxFloat16x4 fColor11; + FfxFloat16x4 fColor21; + FfxFloat16x4 fColor31; + + FfxFloat16x4 fColor02; + FfxFloat16x4 fColor12; + FfxFloat16x4 fColor22; + FfxFloat16x4 fColor32; + + FfxFloat16x4 fColor03; + FfxFloat16x4 fColor13; + FfxFloat16x4 fColor23; + FfxFloat16x4 fColor33; +}; +#else //FFX_HALF +#define FetchedBicubicSamplesMin16 FetchedBicubicSamples +#endif //FFX_HALF + +FfxFloat32x4 Linear(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32 t) +{ + return A + (B - A) * t; +} + +FfxFloat32x4 Bilinear(FetchedBilinearSamples BilinearSamples, FfxFloat32x2 fPxFrac) +{ + FfxFloat32x4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x); + FfxFloat32x4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x); + FfxFloat32x4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y); + return fColorXY; +} + +// SEE: ../Interpolation/CatmullRom.ipynb, t=0 -> B, t=1 -> C +FfxFloat32x4 CubicCatmullRom(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32x4 C, FfxFloat32x4 D, FfxFloat32 t) +{ + FfxFloat32 t2 = t * t; + FfxFloat32 t3 = t * t * t; + FfxFloat32x4 a = -A / 2.f + (3.f * B) / 2.f - (3.f * C) / 2.f + D / 2.f; + FfxFloat32x4 b = A - (5.f * B) / 2.f + 2.f * C - D / 2.f; + FfxFloat32x4 c = -A / 2.f + C / 2.f; + FfxFloat32x4 d = B; + return a * t3 + b * t2 + c * t + d; +} + +FfxFloat32x4 BicubicCatmullRom(FetchedBicubicSamples BicubicSamples, FfxFloat32x2 fPxFrac) +{ + FfxFloat32x4 fColorX0 = CubicCatmullRom(BicubicSamples.fColor00, BicubicSamples.fColor10, BicubicSamples.fColor20, BicubicSamples.fColor30, fPxFrac.x); + FfxFloat32x4 fColorX1 = CubicCatmullRom(BicubicSamples.fColor01, BicubicSamples.fColor11, BicubicSamples.fColor21, BicubicSamples.fColor31, fPxFrac.x); + FfxFloat32x4 fColorX2 = CubicCatmullRom(BicubicSamples.fColor02, BicubicSamples.fColor12, BicubicSamples.fColor22, BicubicSamples.fColor32, fPxFrac.x); + FfxFloat32x4 fColorX3 = CubicCatmullRom(BicubicSamples.fColor03, BicubicSamples.fColor13, BicubicSamples.fColor23, BicubicSamples.fColor33, fPxFrac.x); + FfxFloat32x4 fColorXY = CubicCatmullRom(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + return fColorXY; +} + +FfxFloat32 Lanczos2(FfxFloat32 x) +{ + const FfxFloat32 PI = 3.141592653589793f; // TODO: share SDK constants + return abs(x) < FSR2_EPSILON ? 1.f : (sin(PI * x) / (PI * x)) * (sin(0.5f * PI * x) / (0.5f * PI * x)); +} + +#if FFX_HALF +FfxFloat16 Lanczos2(FfxFloat16 x) +{ + const FFX_MIN16_F PI = FfxFloat16(3.141592653589793f); // TODO: share SDK constants + return abs(x) < FSR2_EPSILON ? FfxFloat16(1.f) : (sin(PI * x) / (PI * x)) * (sin(FfxFloat16(0.5f) * PI * x) / (FfxFloat16(0.5f) * PI * x)); +} +#endif //FFX_HALF + +// FSR1 lanczos approximation. Input is x*x and must be <= 4. +FfxFloat32 Lanczos2ApproxSqNoClamp(FfxFloat32 x2) +{ + FfxFloat32 a = (2.0f / 5.0f) * x2 - 1; + FfxFloat32 b = (1.0f / 4.0f) * x2 - 1; + return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b); +} + +#if FFX_HALF +FfxFloat16 Lanczos2ApproxSqNoClamp(FfxFloat16 x2) +{ + FfxFloat16 a = FfxFloat16(2.0f / 5.0f) * x2 - FfxFloat16(1); + FfxFloat16 b = FfxFloat16(1.0f / 4.0f) * x2 - FfxFloat16(1); + return (FfxFloat16(25.0f / 16.0f) * a * a - FfxFloat16(25.0f / 16.0f - 1)) * (b * b); +} +#endif //FFX_HALF + +FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2) +{ + x2 = ffxMin(x2, 4.0f); + return Lanczos2ApproxSqNoClamp(x2); +} + +#if FFX_HALF +FfxFloat16 Lanczos2ApproxSq(FfxFloat16 x2) +{ + x2 = ffxMin(x2, FfxFloat16(4.0f)); + return Lanczos2ApproxSqNoClamp(x2); +} +#endif //FFX_HALF + +FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x) +{ + return Lanczos2ApproxSqNoClamp(x * x); +} + +#if FFX_HALF +FfxFloat16 Lanczos2ApproxNoClamp(FfxFloat16 x) +{ + return Lanczos2ApproxSqNoClamp(x * x); +} +#endif //FFX_HALF + +FfxFloat32 Lanczos2Approx(FfxFloat32 x) +{ + return Lanczos2ApproxSq(x * x); +} + +#if FFX_HALF +FfxFloat16 Lanczos2Approx(FfxFloat16 x) +{ + return Lanczos2ApproxSq(x * x); +} +#endif //FFX_HALF + +FfxFloat32 Lanczos2_UseLUT(FfxFloat32 x) +{ + return SampleLanczos2Weight(abs(x)); +} + +#if FFX_HALF +FfxFloat16 Lanczos2_UseLUT(FfxFloat16 x) +{ + return SampleLanczos2Weight(abs(x)); +} +#endif //FFX_HALF + +#if FFX_FSR2_OPTION_USE_LANCZOS_LUT +FfxFloat32x4 Lanczos2_AllowLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) +{ + FfxFloat32 fWeight0 = Lanczos2_UseLUT(-1.f - t); + FfxFloat32 fWeight1 = Lanczos2_UseLUT(-0.f - t); + FfxFloat32 fWeight2 = Lanczos2_UseLUT(+1.f - t); + FfxFloat32 fWeight3 = Lanczos2_UseLUT(+2.f - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} +#if FFX_HALF +FfxFloat16x4 Lanczos2_AllowLUT(FfxFloat16x4 fColor0, FfxFloat16x4 fColor1, FfxFloat16x4 fColor2, FfxFloat16x4 fColor3, FfxFloat16 t) +{ + FfxFloat16 fWeight0 = Lanczos2_UseLUT(FfxFloat16(-1.f) - t); + FfxFloat16 fWeight1 = Lanczos2_UseLUT(FfxFloat16(-0.f) - t); + FfxFloat16 fWeight2 = Lanczos2_UseLUT(FfxFloat16(+1.f) - t); + FfxFloat16 fWeight3 = Lanczos2_UseLUT(FfxFloat16(+2.f) - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} +#endif //FFX_HALF +#else //FFX_FSR2_OPTION_USE_LANCZOS_LUT +#define Lanczos2_AllowLUT Lanczos2 +#endif //FFX_FSR2_OPTION_USE_LANCZOS_LUT + +FfxFloat32x4 Lanczos2(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) +{ + FfxFloat32 fWeight0 = Lanczos2(-1.f - t); + FfxFloat32 fWeight1 = Lanczos2(-0.f - t); + FfxFloat32 fWeight2 = Lanczos2(+1.f - t); + FfxFloat32 fWeight3 = Lanczos2(+2.f - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} + +FfxFloat32x4 Lanczos2(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) +{ + FfxFloat32x4 fColorX0 = Lanczos2_AllowLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FfxFloat32x4 fColorX1 = Lanczos2_AllowLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FfxFloat32x4 fColorX2 = Lanczos2_AllowLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FfxFloat32x4 fColorX3 = Lanczos2_AllowLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FfxFloat32x4 fColorXY = Lanczos2_AllowLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FfxFloat32x4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; + FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { + + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); + + return fColorXY; +} + +#if FFX_HALF +FfxFloat16x4 Lanczos2(FfxFloat16x4 fColor0, FfxFloat16x4 fColor1, FfxFloat16x4 fColor2, FfxFloat16x4 fColor3, FfxFloat16 t) +{ + FfxFloat16 fWeight0 = Lanczos2(FfxFloat16(-1.f) - t); + FfxFloat16 fWeight1 = Lanczos2(FfxFloat16(-0.f) - t); + FfxFloat16 fWeight2 = Lanczos2(FfxFloat16(+1.f) - t); + FfxFloat16 fWeight3 = Lanczos2(FfxFloat16(+2.f) - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} + +FfxFloat16x4 Lanczos2(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac) +{ + FfxFloat16x4 fColorX0 = Lanczos2_AllowLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FfxFloat16x4 fColorX1 = Lanczos2_AllowLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FfxFloat16x4 fColorX2 = Lanczos2_AllowLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FfxFloat16x4 fColorX3 = Lanczos2_AllowLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FfxFloat16x4 fColorXY = Lanczos2_AllowLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FfxFloat16x4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FfxFloat16x4 fDeringingMin = fDeringingSamples[0]; + FfxFloat16x4 fDeringingMax = fDeringingSamples[0]; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) + { + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); + + return fColorXY; +} +#endif //FFX_HALF + +FfxFloat32x4 Lanczos2Approx(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) +{ + FfxFloat32 fWeight0 = Lanczos2ApproxNoClamp(-1.f - t); + FfxFloat32 fWeight1 = Lanczos2ApproxNoClamp(-0.f - t); + FfxFloat32 fWeight2 = Lanczos2ApproxNoClamp(+1.f - t); + FfxFloat32 fWeight3 = Lanczos2ApproxNoClamp(+2.f - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} + +#if FFX_HALF +FfxFloat16x4 Lanczos2Approx(FfxFloat16x4 fColor0, FfxFloat16x4 fColor1, FfxFloat16x4 fColor2, FfxFloat16x4 fColor3, FfxFloat16 t) +{ + FfxFloat16 fWeight0 = Lanczos2ApproxNoClamp(FfxFloat16(-1.f) - t); + FfxFloat16 fWeight1 = Lanczos2ApproxNoClamp(FfxFloat16(-0.f) - t); + FfxFloat16 fWeight2 = Lanczos2ApproxNoClamp(FfxFloat16(+1.f) - t); + FfxFloat16 fWeight3 = Lanczos2ApproxNoClamp(FfxFloat16(+2.f) - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} +#endif //FFX_HALF + +FfxFloat32x4 Lanczos2Approx(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) +{ + FfxFloat32x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FfxFloat32x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FfxFloat32x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FfxFloat32x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FfxFloat32x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FfxFloat32x4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; + FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) + { + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); + + return fColorXY; +} + +#if FFX_HALF +FfxFloat16x4 Lanczos2Approx(FetchedBicubicSamplesMin16 Samples, FfxFloat16x2 fPxFrac) +{ + FfxFloat16x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FfxFloat16x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FfxFloat16x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FfxFloat16x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FfxFloat16x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FfxFloat16x4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FfxFloat16x4 fDeringingMin = fDeringingSamples[0]; + FfxFloat16x4 fDeringingMax = fDeringingSamples[0]; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) + { + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); + + return fColorXY; +} + +// Clamp by offset direction. Assuming iPxSample is already in range and iPxOffset is compile time constant. +FfxInt16x2 ClampLoadBicubic(FfxInt16x2 iPxSample, FfxInt16x2 iPxOffset, FfxInt16x2 iTextureSize) +{ + FfxInt16x2 result = iPxSample + iPxOffset; + result.x = (iPxOffset.x <= 0) ? ffxMax(result.x, FfxInt16(0)) : result.x; + result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - FfxInt16(1)) : result.x; + result.y = (iPxOffset.y <= 0) ? ffxMax(result.y, FfxInt16(0)) : result.y; + result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - FfxInt16(1)) : result.y; + return result; +} +#endif //FFX_HALF + +FfxInt32x2 ClampLoadBicubic(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize) +{ + FfxInt32x2 result = iPxSample + iPxOffset; + result.x = (iPxOffset.x <= 0) ? ffxMax(result.x, 0) : result.x; + result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x; + result.y = (iPxOffset.y <= 0) ? ffxMax(result.y, 0) : result.y; + result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y; + return result; +} + +#define DeclareCustomFetchBicubicSamplesWithType(SampleType, AddrType, Name, LoadTexture) \ + SampleType Name(AddrType iPxSample, AddrType iTextureSize) \ + { \ + SampleType Samples; \ + \ + Samples.fColor00 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(-1, -1), iTextureSize)); \ + Samples.fColor10 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+0, -1), iTextureSize)); \ + Samples.fColor20 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+1, -1), iTextureSize)); \ + Samples.fColor30 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+2, -1), iTextureSize)); \ + \ + Samples.fColor01 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(-1, +0), iTextureSize)); \ + Samples.fColor11 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+0, +0), iTextureSize)); \ + Samples.fColor21 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+1, +0), iTextureSize)); \ + Samples.fColor31 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+2, +0), iTextureSize)); \ + \ + Samples.fColor02 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(-1, +1), iTextureSize)); \ + Samples.fColor12 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+0, +1), iTextureSize)); \ + Samples.fColor22 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+1, +1), iTextureSize)); \ + Samples.fColor32 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+2, +1), iTextureSize)); \ + \ + Samples.fColor03 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(-1, +2), iTextureSize)); \ + Samples.fColor13 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+0, +2), iTextureSize)); \ + Samples.fColor23 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+1, +2), iTextureSize)); \ + Samples.fColor33 = LoadTexture(ClampLoadBicubic(iPxSample, AddrType(+2, +2), iTextureSize)); \ + \ + return Samples; \ + } + +#define DeclareCustomFetchBicubicSamples(Name, LoadTexture) \ + DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamples, FfxInt32x2, Name, LoadTexture) + +#define DeclareCustomFetchBicubicSamplesMin16(Name, LoadTexture) \ + DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamplesMin16, FFX_MIN16_I2, Name, LoadTexture) + +#define DeclareCustomFetchBilinearSamples(Name, LoadTexture) \ + FetchedBilinearSamples Name(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iTextureSize) \ + { \ + FetchedBilinearSamples Samples; \ + Samples.fColor00 = LoadTexture(ClampLoad(iPxSample, FFX_MIN16_I2(+0, +0), iTextureSize)); \ + Samples.fColor10 = LoadTexture(ClampLoad(iPxSample, FFX_MIN16_I2(+1, +0), iTextureSize)); \ + Samples.fColor01 = LoadTexture(ClampLoad(iPxSample, FFX_MIN16_I2(+0, +1), iTextureSize)); \ + Samples.fColor11 = LoadTexture(ClampLoad(iPxSample, FFX_MIN16_I2(+1, +1), iTextureSize)); \ + return Samples; \ + } + +// BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102) +// is common, so iPxSample can "jitter" +#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \ + FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ + { \ + FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FFX_BROADCAST_FLOAT32X2(0.5f); \ + FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ + FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \ + FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(FFX_MIN16_I2(iPxSample), FFX_MIN16_I2(iTextureSize)), FFX_MIN16_F2(fPxFrac))); \ + return fColorXY; \ + } + +#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \ + FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ + { \ + FfxFloat32x2 fPxSample = fUvSample * FfxFloat32x2(iTextureSize) - FFX_BROADCAST_FLOAT32X2(0.5f); \ + FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ + FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \ + FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(FFX_MIN16_I2(iPxSample), FFX_MIN16_I2(iTextureSize)), FFX_MIN16_F2(fPxFrac))); \ + return fColorXY; \ + } + +#endif //!defined( FFX_FSR2_SAMPLE_H ) diff --git a/Assets/Shaders/FSR2/ffx_fsr2_sample.h.meta b/Assets/Shaders/FSR2/ffx_fsr2_sample.h.meta new file mode 100644 index 0000000..25d6b4a --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_sample.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 8a15223de39afee47b1c5106dc7fba63 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_fsr2_upsample.h b/Assets/Shaders/FSR2/ffx_fsr2_upsample.h new file mode 100644 index 0000000..0b83d6a --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_upsample.h @@ -0,0 +1,148 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR2_UPSAMPLE_H +#define FFX_FSR2_UPSAMPLE_H + +FfxFloat32 SmoothStep(FfxFloat32 x, FfxFloat32 a, FfxFloat32 b) +{ + x = clamp((x - a) / (b - a), 0.f, 1.f); + return x * x * (3.f - 2.f * x); +} + +FFX_STATIC const FfxUInt32 iLanczos2SampleCount = 16; + +void DeringingWithMinMax(UPSAMPLE_F3 fDeringingMin, UPSAMPLE_F3 fDeringingMax, FFX_PARAMETER_INOUT UPSAMPLE_F3 fColor, FFX_PARAMETER_OUT FfxFloat32 fRangeSimilarity) +{ + fRangeSimilarity = fDeringingMin.x / fDeringingMax.x; + fColor = clamp(fColor, fDeringingMin, fDeringingMax); +} + +void Deringing(RectificationBoxData clippingBox, FFX_PARAMETER_INOUT UPSAMPLE_F3 fColor) +{ + fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax); +} + +UPSAMPLE_F GetUpsampleLanczosWeight(UPSAMPLE_F2 fSrcSampleOffset, UPSAMPLE_F2 fKernelWeight) +{ + UPSAMPLE_F2 fSrcSampleOffsetBiased = UPSAMPLE_F2(fSrcSampleOffset * fKernelWeight); + UPSAMPLE_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); // TODO: check other distances (l0, l1, linf...) + + return fSampleWeight; +} + +UPSAMPLE_F Pow3(UPSAMPLE_F x) +{ + return x * x * x; +} + +UPSAMPLE_F4 ComputeUpsampledColorAndWeight(FFX_MIN16_I2 iPxHrPos, UPSAMPLE_F2 fKernelWeight, FFX_PARAMETER_INOUT RectificationBoxData clippingBox) +{ + // We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly) + FfxFloat32x2 fDstOutputPos = FfxFloat32x2(iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f); // Destination resolution output pixel center position + FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor(); // Source resolution output pixel center position + FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos)); // TODO: what about weird upscale factors... + + UPSAMPLE_F3 fSamples[iLanczos2SampleCount]; + + FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FFX_BROADCAST_FLOAT32X2(0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0 + + UPSAMPLE_I2 offsetTL; + offsetTL.x = (fSrcUnjitteredPos.x > fSrcOutputPos.x) ? UPSAMPLE_I(-2) : UPSAMPLE_I(-1); + offsetTL.y = (fSrcUnjitteredPos.y > fSrcOutputPos.y) ? UPSAMPLE_I(-2) : UPSAMPLE_I(-1); + + RectificationBox fRectificationBox; + + //Load samples + // If fSrcUnjitteredPos.y > fSrcOutputPos.y, indicates offsetTL.y = -2, sample offset Y will be [-2, 1], clipbox will be rows [1, 3]. + // Flip row# for sampling offset in this case, so first 0~2 rows in the sampled array can always be used for computing the clipbox. + // This reduces branch or cmove on sampled colors, but moving this overhead to sample position / weight calculation time which apply to less values. + const FfxBoolean bFlipRow = fSrcUnjitteredPos.y > fSrcOutputPos.y; + const FfxBoolean bFlipCol = fSrcUnjitteredPos.x > fSrcOutputPos.x; + + UPSAMPLE_F2 fOffsetTL = UPSAMPLE_F2(offsetTL); + + FFX_UNROLL + for (FfxInt32 row = 0; row < 4; row++) { + + FFX_UNROLL + for (FfxInt32 col = 0; col < 4; col++) { + FfxInt32 iSampleIndex = col + (row << 2); + + FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); + FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + offsetTL + sampleColRow; + + const FfxInt32x2 sampleCoord = ClampLoad(iSrcSamplePos, FfxInt32x2(0, 0), FfxInt32x2(RenderSize())); + + fSamples[iSampleIndex] = LoadPreparedInputColor(FFX_MIN16_I2(sampleCoord)); + } + } + + RectificationBoxReset(fRectificationBox, fSamples[0]); + + UPSAMPLE_F3 fColor = UPSAMPLE_F3(0.f, 0.f, 0.f); + UPSAMPLE_F fWeight = UPSAMPLE_F(0.f); + UPSAMPLE_F2 fBaseSampleOffset = UPSAMPLE_F2(fSrcUnjitteredPos - fSrcOutputPos); + + FFX_UNROLL + for (FfxUInt32 iSampleIndex = 0; iSampleIndex < iLanczos2SampleCount; ++iSampleIndex) + { + FfxInt32 row = FfxInt32(iSampleIndex >> 2); + FfxInt32 col = FfxInt32(iSampleIndex & 3); + + const FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); + const UPSAMPLE_F2 fOffset = fOffsetTL + UPSAMPLE_F2(sampleColRow); + UPSAMPLE_F2 fSrcSampleOffset = fBaseSampleOffset + fOffset; + + FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow; + + UPSAMPLE_F fSampleWeight = UPSAMPLE_F(IsOnScreen(FFX_MIN16_I2(iSrcSamplePos), FFX_MIN16_I2(RenderSize()))) * GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelWeight); + + // Update rectification box + if(all(FFX_LESS_THAN(FfxInt32x2(col, row), FFX_BROADCAST_INT32X2(3)))) + { + //update clipping box in non-locked areas + const UPSAMPLE_F fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset); + UPSAMPLE_F fBoxSampleWeight = UPSAMPLE_F(1) - ffxSaturate(fSrcSampleOffsetSq / UPSAMPLE_F(3)); + fBoxSampleWeight *= fBoxSampleWeight; + RectificationBoxAddSample(fRectificationBox, fSamples[iSampleIndex], fBoxSampleWeight); + } + + fWeight += fSampleWeight; + fColor += fSampleWeight * fSamples[iSampleIndex]; + } + + // Normalize for deringing (we need to compare colors) + fColor = fColor / (abs(fWeight) > FSR2_EPSILON ? fWeight : UPSAMPLE_F(1.f)); + + RectificationBoxComputeVarianceBoxData(fRectificationBox); + clippingBox = RectificationBoxGetData(fRectificationBox); + + Deringing(RectificationBoxGetData(fRectificationBox), fColor); + + if (any(FFX_LESS_THAN(fKernelWeight, UPSAMPLE_F2_BROADCAST(1.0f)))) { + fWeight = UPSAMPLE_F(averageLanczosWeightPerFrame); + } + + return UPSAMPLE_F4(fColor, ffxMax(UPSAMPLE_F(0), fWeight)); +} + +#endif //!defined( FFX_FSR2_UPSAMPLE_H ) diff --git a/Assets/Shaders/FSR2/ffx_fsr2_upsample.h.meta b/Assets/Shaders/FSR2/ffx_fsr2_upsample.h.meta new file mode 100644 index 0000000..72449d5 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_fsr2_upsample.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: e7aee0ec9f5d5434b8bcf55a633a8104 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/Assets/Shaders/FSR2/ffx_spd.h b/Assets/Shaders/FSR2/ffx_spd.h new file mode 100644 index 0000000..5a27a84 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_spd.h @@ -0,0 +1,936 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifdef FFX_CPU +FFX_STATIC void SpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY, // CPU side: dispatch thread group count xy + FfxUInt32x2 workGroupOffset, // GPU side: pass in as constant + FfxUInt32x2 numWorkGroupsAndMips, // GPU side: pass in as constant + FfxUInt32x4 rectInfo, // left, top, width, height + FfxInt32 mips) // optional: if -1, calculate based on rect width and height +{ + workGroupOffset[0] = rectInfo[0] / 64; // rectInfo[0] = left + workGroupOffset[1] = rectInfo[1] / 64; // rectInfo[1] = top + + FfxUInt32 endIndexX = (rectInfo[0] + rectInfo[2] - 1) / 64; // rectInfo[0] = left, rectInfo[2] = width + FfxUInt32 endIndexY = (rectInfo[1] + rectInfo[3] - 1) / 64; // rectInfo[1] = top, rectInfo[3] = height + + dispatchThreadGroupCountXY[0] = endIndexX + 1 - workGroupOffset[0]; + dispatchThreadGroupCountXY[1] = endIndexY + 1 - workGroupOffset[1]; + + numWorkGroupsAndMips[0] = (dispatchThreadGroupCountXY[0]) * (dispatchThreadGroupCountXY[1]); + + if (mips >= 0) + { + numWorkGroupsAndMips[1] = FfxUInt32(mips); + } + else + { + // calculate based on rect width and height + FfxUInt32 resolution = ffxMax(rectInfo[2], rectInfo[3]); + numWorkGroupsAndMips[1] = FfxUInt32((ffxMin(floor(log2(FfxFloat32(resolution))), FfxFloat32(12)))); + } +} + +FFX_STATIC void SpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY, // CPU side: dispatch thread group count xy + FfxUInt32x2 workGroupOffset, // GPU side: pass in as constant + FfxUInt32x2 numWorkGroupsAndMips, // GPU side: pass in as constant + FfxUInt32x4 rectInfo) // left, top, width, height +{ + SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo, -1); +} +#endif // #ifdef FFX_CPU + + +//============================================================================================================================== +// NON-PACKED VERSION +//============================================================================================================================== +#ifdef FFX_GPU +#ifdef SPD_PACKED_ONLY +// Avoid compiler error +FfxFloat32x4 SpdLoadSourceImage(FfxInt32x2 p, FfxUInt32 slice) +{ + return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); +} + +FfxFloat32x4 SpdLoad(FfxInt32x2 p, FfxUInt32 slice) +{ + return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); +} +void SpdStore(FfxInt32x2 p, FfxFloat32x4 value, FfxUInt32 mip, FfxUInt32 slice) +{ +} +FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); +} +void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) +{ +} +FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) +{ + return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); +} +#endif // #ifdef SPD_PACKED_ONLY + +//_____________________________________________________________/\_______________________________________________________________ +#if defined(FFX_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS) +#extension GL_KHR_shader_subgroup_quad:require +#endif + +void SpdWorkgroupShuffleBarrier() +{ +#ifdef FFX_GLSL + barrier(); +#endif +#ifdef FFX_HLSL + GroupMemoryBarrierWithGroupSync(); +#endif +} + +// Only last active workgroup should proceed +bool SpdExitWorkgroup(FfxUInt32 numWorkGroups, FfxUInt32 localInvocationIndex, FfxUInt32 slice) +{ + // global atomic counter + if (localInvocationIndex == 0) + { + SpdIncreaseAtomicCounter(slice); + } + + SpdWorkgroupShuffleBarrier(); + return (SpdGetAtomicCounter() != (numWorkGroups - 1)); +} + +// User defined: FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3); +FfxFloat32x4 SpdReduceQuad(FfxFloat32x4 v) +{ +#if defined(FFX_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS) + + FfxFloat32x4 v0 = v; + FfxFloat32x4 v1 = subgroupQuadSwapHorizontal(v); + FfxFloat32x4 v2 = subgroupQuadSwapVertical(v); + FfxFloat32x4 v3 = subgroupQuadSwapDiagonal(v); + return SpdReduce4(v0, v1, v2, v3); + +#elif defined(FFX_HLSL) && !defined(SPD_NO_WAVE_OPERATIONS) + + // requires SM6.0 + FfxUInt32 quad = WaveGetLaneIndex() & (~0x3); + FfxFloat32x4 v0 = v; + FfxFloat32x4 v1 = WaveReadLaneAt(v, quad | 1); + FfxFloat32x4 v2 = WaveReadLaneAt(v, quad | 2); + FfxFloat32x4 v3 = WaveReadLaneAt(v, quad | 3); + return SpdReduce4(v0, v1, v2, v3); +/* + // if SM6.0 is not available, you can use the AMD shader intrinsics + // the AMD shader intrinsics are available in AMD GPU Services (AGS) library: + // https://gpuopen.com/amd-gpu-services-ags-library/ + // works for DX11 + FfxFloat32x4 v0 = v; + FfxFloat32x4 v1; + v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + FfxFloat32x4 v2; + v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + FfxFloat32x4 v3; + v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + return SpdReduce4(v0, v1, v2, v3); + */ +#endif + return v; +} + +FfxFloat32x4 SpdReduceIntermediate(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3) +{ + FfxFloat32x4 v0 = SpdLoadIntermediate(i0.x, i0.y); + FfxFloat32x4 v1 = SpdLoadIntermediate(i1.x, i1.y); + FfxFloat32x4 v2 = SpdLoadIntermediate(i2.x, i2.y); + FfxFloat32x4 v3 = SpdLoadIntermediate(i3.x, i3.y); + return SpdReduce4(v0, v1, v2, v3); +} + +FfxFloat32x4 SpdReduceLoad4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice) +{ + FfxFloat32x4 v0 = SpdLoad(FfxInt32x2(i0), slice); + FfxFloat32x4 v1 = SpdLoad(FfxInt32x2(i1), slice); + FfxFloat32x4 v2 = SpdLoad(FfxInt32x2(i2), slice); + FfxFloat32x4 v3 = SpdLoad(FfxInt32x2(i3), slice); + return SpdReduce4(v0, v1, v2, v3); +} + +FfxFloat32x4 SpdReduceLoad4(FfxUInt32x2 base, FfxUInt32 slice) +{ + return SpdReduceLoad4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice); +} + +FfxFloat32x4 SpdReduceLoadSourceImage4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice) +{ + FfxFloat32x4 v0 = SpdLoadSourceImage(FfxInt32x2(i0), slice); + FfxFloat32x4 v1 = SpdLoadSourceImage(FfxInt32x2(i1), slice); + FfxFloat32x4 v2 = SpdLoadSourceImage(FfxInt32x2(i2), slice); + FfxFloat32x4 v3 = SpdLoadSourceImage(FfxInt32x2(i3), slice); + return SpdReduce4(v0, v1, v2, v3); +} + +FfxFloat32x4 SpdReduceLoadSourceImage(FfxUInt32x2 base, FfxUInt32 slice) +{ +#ifdef SPD_LINEAR_SAMPLER + return SpdLoadSourceImage(FfxInt32x2(base), slice); +#else + return SpdReduceLoadSourceImage4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice); +#endif +} + +void SpdDownsampleMips_0_1_Intrinsics(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ + FfxFloat32x4 v[4]; + + FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2); + FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y); + v[0] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[0], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y); + v[1] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[1], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16); + v[2] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[2], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16); + v[3] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[3], 0, slice); + + if (mip <= 1) + return; + + v[0] = SpdReduceQuad(v[0]); + v[1] = SpdReduceQuad(v[1]); + v[2] = SpdReduceQuad(v[2]); + v[3] = SpdReduceQuad(v[3]); + + if ((localInvocationIndex % 4) == 0) + { + SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2), v[0], 1, slice); + SpdStoreIntermediate(x / 2, y / 2, v[0]); + + SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2), v[1], 1, slice); + SpdStoreIntermediate(x / 2 + 8, y / 2, v[1]); + + SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2 + 8), v[2], 1, slice); + SpdStoreIntermediate(x / 2, y / 2 + 8, v[2]); + + SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2 + 8), v[3], 1, slice); + SpdStoreIntermediate(x / 2 + 8, y / 2 + 8, v[3]); + } +} + +void SpdDownsampleMips_0_1_LDS(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ + FfxFloat32x4 v[4]; + + FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2); + FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y); + v[0] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[0], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y); + v[1] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[1], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16); + v[2] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[2], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16); + v[3] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[3], 0, slice); + + if (mip <= 1) + return; + + for (FfxUInt32 i = 0; i < 4; i++) + { + SpdStoreIntermediate(x, y, v[i]); + SpdWorkgroupShuffleBarrier(); + if (localInvocationIndex < 64) + { + v[i] = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); + SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice); + } + SpdWorkgroupShuffleBarrier(); + } + + if (localInvocationIndex < 64) + { + SpdStoreIntermediate(x + 0, y + 0, v[0]); + SpdStoreIntermediate(x + 8, y + 0, v[1]); + SpdStoreIntermediate(x + 0, y + 8, v[2]); + SpdStoreIntermediate(x + 8, y + 8, v[3]); + } +} + +void SpdDownsampleMips_0_1(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#ifdef SPD_NO_WAVE_OPERATIONS + SpdDownsampleMips_0_1_LDS(x, y, workGroupID, localInvocationIndex, mip, slice); +#else + SpdDownsampleMips_0_1_Intrinsics(x, y, workGroupID, localInvocationIndex, mip, slice); +#endif +} + + +void SpdDownsampleMip_2(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#ifdef SPD_NO_WAVE_OPERATIONS + if (localInvocationIndex < 64) + { + FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); + SpdStore(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x, y), v, mip, slice); + // store to LDS, try to reduce bank conflicts + // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 + // ... + // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 + SpdStoreIntermediate(x * 2 + y % 2, y * 2, v); + } +#else + FfxFloat32x4 v = SpdLoadIntermediate(x, y); + v = SpdReduceQuad(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStore(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x / 2, y / 2), v, mip, slice); + SpdStoreIntermediate(x + (y / 2) % 2, y, v); + } +#endif +} + +void SpdDownsampleMip_3(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#ifdef SPD_NO_WAVE_OPERATIONS + if (localInvocationIndex < 16) + { + // x 0 x 0 + // 0 0 0 0 + // 0 x 0 x + // 0 0 0 0 + FfxFloat32x4 v = + SpdReduceIntermediate(FfxUInt32x2(x * 4 + 0 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 2 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 0 + 1, y * 4 + 2), FfxUInt32x2(x * 4 + 2 + 1, y * 4 + 2)); + SpdStore(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x, y), v, mip, slice); + // store to LDS + // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 + // ... + // 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 + // ... + // 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x + // ... + SpdStoreIntermediate(x * 4 + y, y * 4, v); + } +#else + if (localInvocationIndex < 64) + { + FfxFloat32x4 v = SpdLoadIntermediate(x * 2 + y % 2, y * 2); + v = SpdReduceQuad(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStore(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x / 2, y / 2), v, mip, slice); + SpdStoreIntermediate(x * 2 + y / 2, y * 2, v); + } + } +#endif +} + +void SpdDownsampleMip_4(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#ifdef SPD_NO_WAVE_OPERATIONS + if (localInvocationIndex < 4) + { + // x 0 0 0 x 0 0 0 + // ... + // 0 x 0 0 0 x 0 0 + FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 8 + 0 + 0 + y * 2, y * 8 + 0), + FfxUInt32x2(x * 8 + 4 + 0 + y * 2, y * 8 + 0), + FfxUInt32x2(x * 8 + 0 + 1 + y * 2, y * 8 + 4), + FfxUInt32x2(x * 8 + 4 + 1 + y * 2, y * 8 + 4)); + SpdStore(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x, y), v, mip, slice); + // store to LDS + // x x x x 0 ... + // 0 ... + SpdStoreIntermediate(x + y * 2, 0, v); + } +#else + if (localInvocationIndex < 16) + { + FfxFloat32x4 v = SpdLoadIntermediate(x * 4 + y, y * 4); + v = SpdReduceQuad(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStore(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x / 2, y / 2), v, mip, slice); + SpdStoreIntermediate(x / 2 + y, 0, v); + } + } +#endif +} + +void SpdDownsampleMip_5(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#ifdef SPD_NO_WAVE_OPERATIONS + if (localInvocationIndex < 1) + { + // x x x x 0 ... + // 0 ... + FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(0, 0), FfxUInt32x2(1, 0), FfxUInt32x2(2, 0), FfxUInt32x2(3, 0)); + SpdStore(FfxInt32x2(workGroupID.xy), v, mip, slice); + } +#else + if (localInvocationIndex < 4) + { + FfxFloat32x4 v = SpdLoadIntermediate(localInvocationIndex, 0); + v = SpdReduceQuad(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStore(FfxInt32x2(workGroupID.xy), v, mip, slice); + } + } +#endif +} + +void SpdDownsampleMips_6_7(FfxUInt32 x, FfxUInt32 y, FfxUInt32 mips, FfxUInt32 slice) +{ + FfxInt32x2 tex = FfxInt32x2(x * 4 + 0, y * 4 + 0); + FfxInt32x2 pix = FfxInt32x2(x * 2 + 0, y * 2 + 0); + FfxFloat32x4 v0 = SpdReduceLoad4(tex, slice); + SpdStore(pix, v0, 6, slice); + + tex = FfxInt32x2(x * 4 + 2, y * 4 + 0); + pix = FfxInt32x2(x * 2 + 1, y * 2 + 0); + FfxFloat32x4 v1 = SpdReduceLoad4(tex, slice); + SpdStore(pix, v1, 6, slice); + + tex = FfxInt32x2(x * 4 + 0, y * 4 + 2); + pix = FfxInt32x2(x * 2 + 0, y * 2 + 1); + FfxFloat32x4 v2 = SpdReduceLoad4(tex, slice); + SpdStore(pix, v2, 6, slice); + + tex = FfxInt32x2(x * 4 + 2, y * 4 + 2); + pix = FfxInt32x2(x * 2 + 1, y * 2 + 1); + FfxFloat32x4 v3 = SpdReduceLoad4(tex, slice); + SpdStore(pix, v3, 6, slice); + + if (mips <= 7) + return; + // no barrier needed, working on values only from the same thread + + FfxFloat32x4 v = SpdReduce4(v0, v1, v2, v3); + SpdStore(FfxInt32x2(x, y), v, 7, slice); + SpdStoreIntermediate(x, y, v); +} + +void SpdDownsampleNextFour(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 baseMip, FfxUInt32 mips, FfxUInt32 slice) +{ + if (mips <= baseMip) + return; + SpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_2(x, y, workGroupID, localInvocationIndex, baseMip, slice); + + if (mips <= baseMip + 1) + return; + SpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_3(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice); + + if (mips <= baseMip + 2) + return; + SpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_4(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice); + + if (mips <= baseMip + 3) + return; + SpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_5(workGroupID, localInvocationIndex, baseMip + 3, slice); +} + +void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice) +{ + FfxUInt32x2 sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64); + FfxUInt32 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2); + FfxUInt32 y = sub_xy.y + 8 * ((localInvocationIndex >> 7)); + SpdDownsampleMips_0_1(x, y, workGroupID, localInvocationIndex, mips, slice); + + SpdDownsampleNextFour(x, y, workGroupID, localInvocationIndex, 2, mips, slice); + + if (mips <= 6) + return; + + if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice)) + return; + + SpdResetAtomicCounter(slice); + + // After mip 6 there is only a single workgroup left that downsamples the remaining up to 64x64 texels. + SpdDownsampleMips_6_7(x, y, mips, slice); + + SpdDownsampleNextFour(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice); +} + +void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset) +{ + SpdDownsample(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +//============================================================================================================================== +// PACKED VERSION +//============================================================================================================================== + +#if FFX_HALF + +#ifdef FFX_GLSL +#extension GL_EXT_shader_subgroup_extended_types_float16:require +#endif + +FfxFloat16x4 SpdReduceQuadH(FfxFloat16x4 v) +{ +#if defined(FFX_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS) + FfxFloat16x4 v0 = v; + FfxFloat16x4 v1 = subgroupQuadSwapHorizontal(v); + FfxFloat16x4 v2 = subgroupQuadSwapVertical(v); + FfxFloat16x4 v3 = subgroupQuadSwapDiagonal(v); + return SpdReduce4H(v0, v1, v2, v3); +#elif defined(FFX_HLSL) && !defined(SPD_NO_WAVE_OPERATIONS) + // requires SM6.0 + FfxUInt32 quad = WaveGetLaneIndex() & (~0x3); + FfxFloat16x4 v0 = v; + FfxFloat16x4 v1 = WaveReadLaneAt(v, quad | 1); + FfxFloat16x4 v2 = WaveReadLaneAt(v, quad | 2); + FfxFloat16x4 v3 = WaveReadLaneAt(v, quad | 3); + return SpdReduce4H(v0, v1, v2, v3); +/* + // if SM6.0 is not available, you can use the AMD shader intrinsics + // the AMD shader intrinsics are available in AMD GPU Services (AGS) library: + // https://gpuopen.com/amd-gpu-services-ags-library/ + // works for DX11 + FfxFloat16x4 v0 = v; + FfxFloat16x4 v1; + v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1); + FfxFloat16x4 v2; + v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2); + FfxFloat16x4 v3; + v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4); + return SpdReduce4H(v0, v1, v2, v3); + */ +#endif + return FfxFloat16x4(0.0, 0.0, 0.0, 0.0); +} + +FfxFloat16x4 SpdReduceIntermediateH(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3) +{ + FfxFloat16x4 v0 = SpdLoadIntermediateH(i0.x, i0.y); + FfxFloat16x4 v1 = SpdLoadIntermediateH(i1.x, i1.y); + FfxFloat16x4 v2 = SpdLoadIntermediateH(i2.x, i2.y); + FfxFloat16x4 v3 = SpdLoadIntermediateH(i3.x, i3.y); + return SpdReduce4H(v0, v1, v2, v3); +} + +FfxFloat16x4 SpdReduceLoad4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice) +{ + FfxFloat16x4 v0 = SpdLoadH(FfxInt32x2(i0), slice); + FfxFloat16x4 v1 = SpdLoadH(FfxInt32x2(i1), slice); + FfxFloat16x4 v2 = SpdLoadH(FfxInt32x2(i2), slice); + FfxFloat16x4 v3 = SpdLoadH(FfxInt32x2(i3), slice); + return SpdReduce4H(v0, v1, v2, v3); +} + +FfxFloat16x4 SpdReduceLoad4H(FfxUInt32x2 base, FfxUInt32 slice) +{ + return SpdReduceLoad4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice); +} + +FfxFloat16x4 SpdReduceLoadSourceImage4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice) +{ + FfxFloat16x4 v0 = SpdLoadSourceImageH(FfxInt32x2(i0), slice); + FfxFloat16x4 v1 = SpdLoadSourceImageH(FfxInt32x2(i1), slice); + FfxFloat16x4 v2 = SpdLoadSourceImageH(FfxInt32x2(i2), slice); + FfxFloat16x4 v3 = SpdLoadSourceImageH(FfxInt32x2(i3), slice); + return SpdReduce4H(v0, v1, v2, v3); +} + +FfxFloat16x4 SpdReduceLoadSourceImageH(FfxUInt32x2 base, FfxUInt32 slice) +{ +#ifdef SPD_LINEAR_SAMPLER + return SpdLoadSourceImageH(FfxInt32x2(base), slice); +#else + return SpdReduceLoadSourceImage4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice); +#endif +} + +void SpdDownsampleMips_0_1_IntrinsicsH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice) +{ + FfxFloat16x4 v[4]; + + FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2); + FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y); + v[0] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[0], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y); + v[1] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[1], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16); + v[2] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[2], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16); + v[3] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[3], 0, slice); + + if (mips <= 1) + return; + + v[0] = SpdReduceQuadH(v[0]); + v[1] = SpdReduceQuadH(v[1]); + v[2] = SpdReduceQuadH(v[2]); + v[3] = SpdReduceQuadH(v[3]); + + if ((localInvocationIndex % 4) == 0) + { + SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2), v[0], 1, slice); + SpdStoreIntermediateH(x / 2, y / 2, v[0]); + + SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2), v[1], 1, slice); + SpdStoreIntermediateH(x / 2 + 8, y / 2, v[1]); + + SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2 + 8), v[2], 1, slice); + SpdStoreIntermediateH(x / 2, y / 2 + 8, v[2]); + + SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2 + 8), v[3], 1, slice); + SpdStoreIntermediateH(x / 2 + 8, y / 2 + 8, v[3]); + } +} + +void SpdDownsampleMips_0_1_LDSH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice) +{ + FfxFloat16x4 v[4]; + + FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2); + FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y); + v[0] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[0], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y); + v[1] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[1], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16); + v[2] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[2], 0, slice); + + tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32); + pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16); + v[3] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[3], 0, slice); + + if (mips <= 1) + return; + + for (FfxInt32 i = 0; i < 4; i++) + { + SpdStoreIntermediateH(x, y, v[i]); + SpdWorkgroupShuffleBarrier(); + if (localInvocationIndex < 64) + { + v[i] = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); + SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice); + } + SpdWorkgroupShuffleBarrier(); + } + + if (localInvocationIndex < 64) + { + SpdStoreIntermediateH(x + 0, y + 0, v[0]); + SpdStoreIntermediateH(x + 8, y + 0, v[1]); + SpdStoreIntermediateH(x + 0, y + 8, v[2]); + SpdStoreIntermediateH(x + 8, y + 8, v[3]); + } +} + +void SpdDownsampleMips_0_1H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice) +{ +#ifdef SPD_NO_WAVE_OPERATIONS + SpdDownsampleMips_0_1_LDSH(x, y, workGroupID, localInvocationIndex, mips, slice); +#else + SpdDownsampleMips_0_1_IntrinsicsH(x, y, workGroupID, localInvocationIndex, mips, slice); +#endif +} + + +void SpdDownsampleMip_2H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#ifdef SPD_NO_WAVE_OPERATIONS + if (localInvocationIndex < 64) + { + FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); + SpdStoreH(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x, y), v, mip, slice); + // store to LDS, try to reduce bank conflicts + // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 + // ... + // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 + SpdStoreIntermediateH(x * 2 + y % 2, y * 2, v); + } +#else + FfxFloat16x4 v = SpdLoadIntermediateH(x, y); + v = SpdReduceQuadH(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStoreH(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x / 2, y / 2), v, mip, slice); + SpdStoreIntermediateH(x + (y / 2) % 2, y, v); + } +#endif +} + +void SpdDownsampleMip_3H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#ifdef SPD_NO_WAVE_OPERATIONS + if (localInvocationIndex < 16) + { + // x 0 x 0 + // 0 0 0 0 + // 0 x 0 x + // 0 0 0 0 + FfxFloat16x4 v = + SpdReduceIntermediateH(FfxUInt32x2(x * 4 + 0 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 2 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 0 + 1, y * 4 + 2), FfxUInt32x2(x * 4 + 2 + 1, y * 4 + 2)); + SpdStoreH(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x, y), v, mip, slice); + // store to LDS + // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + // 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 + // ... + // 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 + // ... + // 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x + // ... + SpdStoreIntermediateH(x * 4 + y, y * 4, v); + } +#else + if (localInvocationIndex < 64) + { + FfxFloat16x4 v = SpdLoadIntermediateH(x * 2 + y % 2, y * 2); + v = SpdReduceQuadH(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStoreH(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x / 2, y / 2), v, mip, slice); + SpdStoreIntermediateH(x * 2 + y / 2, y * 2, v); + } + } +#endif +} + +void SpdDownsampleMip_4H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#ifdef SPD_NO_WAVE_OPERATIONS + if (localInvocationIndex < 4) + { + // x 0 0 0 x 0 0 0 + // ... + // 0 x 0 0 0 x 0 0 + FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 8 + 0 + 0 + y * 2, y * 8 + 0), + FfxUInt32x2(x * 8 + 4 + 0 + y * 2, y * 8 + 0), + FfxUInt32x2(x * 8 + 0 + 1 + y * 2, y * 8 + 4), + FfxUInt32x2(x * 8 + 4 + 1 + y * 2, y * 8 + 4)); + SpdStoreH(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x, y), v, mip, slice); + // store to LDS + // x x x x 0 ... + // 0 ... + SpdStoreIntermediateH(x + y * 2, 0, v); + } +#else + if (localInvocationIndex < 16) + { + FfxFloat16x4 v = SpdLoadIntermediateH(x * 4 + y, y * 4); + v = SpdReduceQuadH(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStoreH(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x / 2, y / 2), v, mip, slice); + SpdStoreIntermediateH(x / 2 + y, 0, v); + } + } +#endif +} + +void SpdDownsampleMip_5H(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) +{ +#ifdef SPD_NO_WAVE_OPERATIONS + if (localInvocationIndex < 1) + { + // x x x x 0 ... + // 0 ... + FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(0, 0), FfxUInt32x2(1, 0), FfxUInt32x2(2, 0), FfxUInt32x2(3, 0)); + SpdStoreH(FfxInt32x2(workGroupID.xy), v, mip, slice); + } +#else + if (localInvocationIndex < 4) + { + FfxFloat16x4 v = SpdLoadIntermediateH(localInvocationIndex, 0); + v = SpdReduceQuadH(v); + // quad index 0 stores result + if (localInvocationIndex % 4 == 0) + { + SpdStoreH(FfxInt32x2(workGroupID.xy), v, mip, slice); + } + } +#endif +} + +void SpdDownsampleMips_6_7H(FfxUInt32 x, FfxUInt32 y, FfxUInt32 mips, FfxUInt32 slice) +{ + FfxInt32x2 tex = FfxInt32x2(x * 4 + 0, y * 4 + 0); + FfxInt32x2 pix = FfxInt32x2(x * 2 + 0, y * 2 + 0); + FfxFloat16x4 v0 = SpdReduceLoad4H(tex, slice); + SpdStoreH(pix, v0, 6, slice); + + tex = FfxInt32x2(x * 4 + 2, y * 4 + 0); + pix = FfxInt32x2(x * 2 + 1, y * 2 + 0); + FfxFloat16x4 v1 = SpdReduceLoad4H(tex, slice); + SpdStoreH(pix, v1, 6, slice); + + tex = FfxInt32x2(x * 4 + 0, y * 4 + 2); + pix = FfxInt32x2(x * 2 + 0, y * 2 + 1); + FfxFloat16x4 v2 = SpdReduceLoad4H(tex, slice); + SpdStoreH(pix, v2, 6, slice); + + tex = FfxInt32x2(x * 4 + 2, y * 4 + 2); + pix = FfxInt32x2(x * 2 + 1, y * 2 + 1); + FfxFloat16x4 v3 = SpdReduceLoad4H(tex, slice); + SpdStoreH(pix, v3, 6, slice); + + if (mips < 8) + return; + // no barrier needed, working on values only from the same thread + + FfxFloat16x4 v = SpdReduce4H(v0, v1, v2, v3); + SpdStoreH(FfxInt32x2(x, y), v, 7, slice); + SpdStoreIntermediateH(x, y, v); +} + +void SpdDownsampleNextFourH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 baseMip, FfxUInt32 mips, FfxUInt32 slice) +{ + if (mips <= baseMip) + return; + SpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_2H(x, y, workGroupID, localInvocationIndex, baseMip, slice); + + if (mips <= baseMip + 1) + return; + SpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_3H(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice); + + if (mips <= baseMip + 2) + return; + SpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_4H(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice); + + if (mips <= baseMip + 3) + return; + SpdWorkgroupShuffleBarrier(); + SpdDownsampleMip_5H(workGroupID, localInvocationIndex, baseMip + 3, slice); +} + +void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice) +{ + FfxUInt32x2 sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64); + FfxUInt32 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2); + FfxUInt32 y = sub_xy.y + 8 * ((localInvocationIndex >> 7)); + + SpdDownsampleMips_0_1H(x, y, workGroupID, localInvocationIndex, mips, slice); + + SpdDownsampleNextFourH(x, y, workGroupID, localInvocationIndex, 2, mips, slice); + + if (mips < 7) + return; + + if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice)) + return; + + SpdResetAtomicCounter(slice); + + // After mip 6 there is only a single workgroup left that downsamples the remaining up to 64x64 texels. + SpdDownsampleMips_6_7H(x, y, mips, slice); + + SpdDownsampleNextFourH(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice); +} + +void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset) +{ + SpdDownsampleH(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice); +} + +#endif // #if FFX_HALF +#endif // #ifdef FFX_GPU diff --git a/Assets/Shaders/FSR2/ffx_spd.h.meta b/Assets/Shaders/FSR2/ffx_spd.h.meta new file mode 100644 index 0000000..1f9d163 --- /dev/null +++ b/Assets/Shaders/FSR2/ffx_spd.h.meta @@ -0,0 +1,27 @@ +fileFormatVersion: 2 +guid: 37829421a92b3754c97640dfa9babbc0 +PluginImporter: + externalObjects: {} + serializedVersion: 2 + iconMap: {} + executionOrder: {} + defineConstraints: [] + isPreloaded: 0 + isOverridable: 0 + isExplicitlyReferenced: 0 + validateReferences: 1 + platformData: + - first: + Any: + second: + enabled: 1 + settings: {} + - first: + Editor: Editor + second: + enabled: 0 + settings: + DefaultValueInitialized: true + userData: + assetBundleName: + assetBundleVariant: diff --git a/FSR2Test.sln b/FSR2Test.sln new file mode 100644 index 0000000..c569899 --- /dev/null +++ b/FSR2Test.sln @@ -0,0 +1,17 @@ + +Microsoft Visual Studio Solution File, Format Version 11.00 +# Visual Studio 2010 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Assembly-CSharp", "Assembly-CSharp.csproj", "{ff4efef8-0523-63da-ea8a-38e4fad76a23}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {ff4efef8-0523-63da-ea8a-38e4fad76a23}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {ff4efef8-0523-63da-ea8a-38e4fad76a23}.Debug|Any CPU.Build.0 = Debug|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/Packages/manifest.json b/Packages/manifest.json new file mode 100644 index 0000000..7490187 --- /dev/null +++ b/Packages/manifest.json @@ -0,0 +1,45 @@ +{ + "dependencies": { + "com.unity.collab-proxy": "1.15.18", + "com.unity.feature.development": "1.0.1", + "com.unity.ide.rider": "3.0.14", + "com.unity.ide.visualstudio": "2.0.15", + "com.unity.ide.vscode": "1.2.5", + "com.unity.test-framework": "1.1.31", + "com.unity.textmeshpro": "3.0.6", + "com.unity.timeline": "1.6.4", + "com.unity.ugui": "1.0.0", + "com.unity.visualscripting": "1.7.8", + "com.unity.modules.ai": "1.0.0", + "com.unity.modules.androidjni": "1.0.0", + "com.unity.modules.animation": "1.0.0", + "com.unity.modules.assetbundle": "1.0.0", + "com.unity.modules.audio": "1.0.0", + "com.unity.modules.cloth": "1.0.0", + "com.unity.modules.director": "1.0.0", + "com.unity.modules.imageconversion": "1.0.0", + "com.unity.modules.imgui": "1.0.0", + "com.unity.modules.jsonserialize": "1.0.0", + "com.unity.modules.particlesystem": "1.0.0", + "com.unity.modules.physics": "1.0.0", + "com.unity.modules.physics2d": "1.0.0", + "com.unity.modules.screencapture": "1.0.0", + "com.unity.modules.terrain": "1.0.0", + "com.unity.modules.terrainphysics": "1.0.0", + "com.unity.modules.tilemap": "1.0.0", + "com.unity.modules.ui": "1.0.0", + "com.unity.modules.uielements": "1.0.0", + "com.unity.modules.umbra": "1.0.0", + "com.unity.modules.unityanalytics": "1.0.0", + "com.unity.modules.unitywebrequest": "1.0.0", + "com.unity.modules.unitywebrequestassetbundle": "1.0.0", + "com.unity.modules.unitywebrequestaudio": "1.0.0", + "com.unity.modules.unitywebrequesttexture": "1.0.0", + "com.unity.modules.unitywebrequestwww": "1.0.0", + "com.unity.modules.vehicles": "1.0.0", + "com.unity.modules.video": "1.0.0", + "com.unity.modules.vr": "1.0.0", + "com.unity.modules.wind": "1.0.0", + "com.unity.modules.xr": "1.0.0" + } +} diff --git a/Packages/packages-lock.json b/Packages/packages-lock.json new file mode 100644 index 0000000..3602e16 --- /dev/null +++ b/Packages/packages-lock.json @@ -0,0 +1,413 @@ +{ + "dependencies": { + "com.unity.collab-proxy": { + "version": "1.15.18", + "depth": 0, + "source": "registry", + "dependencies": { + "com.unity.services.core": "1.0.1" + }, + "url": "https://packages.unity.com" + }, + "com.unity.editorcoroutines": { + "version": "1.0.0", + "depth": 1, + "source": "registry", + "dependencies": {}, + "url": "https://packages.unity.com" + }, + "com.unity.ext.nunit": { + "version": "1.0.6", + "depth": 1, + "source": "registry", + "dependencies": {}, + "url": "https://packages.unity.com" + }, + "com.unity.feature.development": { + "version": "1.0.1", + "depth": 0, + "source": "builtin", + "dependencies": { + "com.unity.ide.visualstudio": "2.0.15", + "com.unity.ide.rider": "3.0.14", + "com.unity.ide.vscode": "1.2.5", + "com.unity.editorcoroutines": "1.0.0", + "com.unity.performance.profile-analyzer": "1.1.1", + "com.unity.test-framework": "1.1.31", + "com.unity.testtools.codecoverage": "1.0.1" + } + }, + "com.unity.ide.rider": { + "version": "3.0.14", + "depth": 0, + "source": "registry", + "dependencies": { + "com.unity.ext.nunit": "1.0.6" + }, + "url": "https://packages.unity.com" + }, + "com.unity.ide.visualstudio": { + "version": "2.0.15", + "depth": 0, + "source": "registry", + "dependencies": { + "com.unity.test-framework": "1.1.9" + }, + "url": "https://packages.unity.com" + }, + "com.unity.ide.vscode": { + "version": "1.2.5", + "depth": 0, + "source": "registry", + "dependencies": {}, + "url": "https://packages.unity.com" + }, + "com.unity.nuget.newtonsoft-json": { + "version": "3.0.2", + "depth": 2, + "source": "registry", + "dependencies": {}, + "url": "https://packages.unity.com" + }, + "com.unity.performance.profile-analyzer": { + "version": "1.1.1", + "depth": 1, + "source": "registry", + "dependencies": {}, + "url": "https://packages.unity.com" + }, + "com.unity.services.core": { + "version": "1.4.0", + "depth": 1, + "source": "registry", + "dependencies": { + "com.unity.modules.unitywebrequest": "1.0.0", + "com.unity.nuget.newtonsoft-json": "3.0.2", + "com.unity.modules.androidjni": "1.0.0" + }, + "url": "https://packages.unity.com" + }, + "com.unity.settings-manager": { + "version": "1.0.3", + "depth": 2, + "source": "registry", + "dependencies": {}, + "url": "https://packages.unity.com" + }, + "com.unity.test-framework": { + "version": "1.1.31", + "depth": 0, + "source": "registry", + "dependencies": { + "com.unity.ext.nunit": "1.0.6", + "com.unity.modules.imgui": "1.0.0", + "com.unity.modules.jsonserialize": "1.0.0" + }, + "url": "https://packages.unity.com" + }, + "com.unity.testtools.codecoverage": { + "version": "1.0.1", + "depth": 1, + "source": "registry", + "dependencies": { + "com.unity.test-framework": "1.0.16", + "com.unity.settings-manager": "1.0.1" + }, + "url": "https://packages.unity.com" + }, + "com.unity.textmeshpro": { + "version": "3.0.6", + "depth": 0, + "source": "registry", + "dependencies": { + "com.unity.ugui": "1.0.0" + }, + "url": "https://packages.unity.com" + }, + "com.unity.timeline": { + "version": "1.6.4", + "depth": 0, + "source": "registry", + "dependencies": { + "com.unity.modules.director": "1.0.0", + "com.unity.modules.animation": "1.0.0", + "com.unity.modules.audio": "1.0.0", + "com.unity.modules.particlesystem": "1.0.0" + }, + "url": "https://packages.unity.com" + }, + "com.unity.ugui": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": { + "com.unity.modules.ui": "1.0.0", + "com.unity.modules.imgui": "1.0.0" + } + }, + "com.unity.visualscripting": { + "version": "1.7.8", + "depth": 0, + "source": "registry", + "dependencies": { + "com.unity.ugui": "1.0.0", + "com.unity.modules.jsonserialize": "1.0.0" + }, + "url": "https://packages.unity.com" + }, + "com.unity.modules.ai": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": {} + }, + "com.unity.modules.androidjni": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": {} + }, + "com.unity.modules.animation": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": {} + }, + "com.unity.modules.assetbundle": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": {} + }, + "com.unity.modules.audio": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": {} + }, + "com.unity.modules.cloth": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": { + "com.unity.modules.physics": "1.0.0" + } + }, + "com.unity.modules.director": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": { + "com.unity.modules.audio": "1.0.0", + "com.unity.modules.animation": "1.0.0" + } + }, + "com.unity.modules.imageconversion": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": {} + }, + "com.unity.modules.imgui": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": {} + }, + "com.unity.modules.jsonserialize": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": {} + }, + "com.unity.modules.particlesystem": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": {} + }, + "com.unity.modules.physics": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": {} + }, + "com.unity.modules.physics2d": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": {} + }, + "com.unity.modules.screencapture": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": { + "com.unity.modules.imageconversion": "1.0.0" + } + }, + "com.unity.modules.subsystems": { + "version": "1.0.0", + "depth": 1, + "source": "builtin", + "dependencies": { + "com.unity.modules.jsonserialize": "1.0.0" + } + }, + "com.unity.modules.terrain": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": {} + }, + "com.unity.modules.terrainphysics": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": { + "com.unity.modules.physics": "1.0.0", + "com.unity.modules.terrain": "1.0.0" + } + }, + "com.unity.modules.tilemap": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": { + "com.unity.modules.physics2d": "1.0.0" + } + }, + "com.unity.modules.ui": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": {} + }, + "com.unity.modules.uielements": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": { + "com.unity.modules.ui": "1.0.0", + "com.unity.modules.imgui": "1.0.0", + "com.unity.modules.jsonserialize": "1.0.0", + "com.unity.modules.uielementsnative": "1.0.0" + } + }, + "com.unity.modules.uielementsnative": { + "version": "1.0.0", + "depth": 1, + "source": "builtin", + "dependencies": { + "com.unity.modules.ui": "1.0.0", + "com.unity.modules.imgui": "1.0.0", + "com.unity.modules.jsonserialize": "1.0.0" + } + }, + "com.unity.modules.umbra": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": {} + }, + "com.unity.modules.unityanalytics": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": { + "com.unity.modules.unitywebrequest": "1.0.0", + "com.unity.modules.jsonserialize": "1.0.0" + } + }, + "com.unity.modules.unitywebrequest": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": {} + }, + "com.unity.modules.unitywebrequestassetbundle": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": { + "com.unity.modules.assetbundle": "1.0.0", + "com.unity.modules.unitywebrequest": "1.0.0" + } + }, + "com.unity.modules.unitywebrequestaudio": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": { + "com.unity.modules.unitywebrequest": "1.0.0", + "com.unity.modules.audio": "1.0.0" + } + }, + "com.unity.modules.unitywebrequesttexture": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": { + "com.unity.modules.unitywebrequest": "1.0.0", + "com.unity.modules.imageconversion": "1.0.0" + } + }, + "com.unity.modules.unitywebrequestwww": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": { + "com.unity.modules.unitywebrequest": "1.0.0", + "com.unity.modules.unitywebrequestassetbundle": "1.0.0", + "com.unity.modules.unitywebrequestaudio": "1.0.0", + "com.unity.modules.audio": "1.0.0", + "com.unity.modules.assetbundle": "1.0.0", + "com.unity.modules.imageconversion": "1.0.0" + } + }, + "com.unity.modules.vehicles": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": { + "com.unity.modules.physics": "1.0.0" + } + }, + "com.unity.modules.video": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": { + "com.unity.modules.audio": "1.0.0", + "com.unity.modules.ui": "1.0.0", + "com.unity.modules.unitywebrequest": "1.0.0" + } + }, + "com.unity.modules.vr": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": { + "com.unity.modules.jsonserialize": "1.0.0", + "com.unity.modules.physics": "1.0.0", + "com.unity.modules.xr": "1.0.0" + } + }, + "com.unity.modules.wind": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": {} + }, + "com.unity.modules.xr": { + "version": "1.0.0", + "depth": 0, + "source": "builtin", + "dependencies": { + "com.unity.modules.physics": "1.0.0", + "com.unity.modules.jsonserialize": "1.0.0", + "com.unity.modules.subsystems": "1.0.0" + } + } + } +} diff --git a/ProjectSettings/AudioManager.asset b/ProjectSettings/AudioManager.asset new file mode 100644 index 0000000..07ebfb0 --- /dev/null +++ b/ProjectSettings/AudioManager.asset @@ -0,0 +1,19 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!11 &1 +AudioManager: + m_ObjectHideFlags: 0 + serializedVersion: 2 + m_Volume: 1 + Rolloff Scale: 1 + Doppler Factor: 1 + Default Speaker Mode: 2 + m_SampleRate: 0 + m_DSPBufferSize: 1024 + m_VirtualVoiceCount: 512 + m_RealVoiceCount: 32 + m_SpatializerPlugin: + m_AmbisonicDecoderPlugin: + m_DisableAudio: 0 + m_VirtualizeEffects: 1 + m_RequestedDSPBufferSize: 1024 diff --git a/ProjectSettings/ClusterInputManager.asset b/ProjectSettings/ClusterInputManager.asset new file mode 100644 index 0000000..e7886b2 --- /dev/null +++ b/ProjectSettings/ClusterInputManager.asset @@ -0,0 +1,6 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!236 &1 +ClusterInputManager: + m_ObjectHideFlags: 0 + m_Inputs: [] diff --git a/ProjectSettings/DynamicsManager.asset b/ProjectSettings/DynamicsManager.asset new file mode 100644 index 0000000..cdc1f3e --- /dev/null +++ b/ProjectSettings/DynamicsManager.asset @@ -0,0 +1,34 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!55 &1 +PhysicsManager: + m_ObjectHideFlags: 0 + serializedVersion: 11 + m_Gravity: {x: 0, y: -9.81, z: 0} + m_DefaultMaterial: {fileID: 0} + m_BounceThreshold: 2 + m_SleepThreshold: 0.005 + m_DefaultContactOffset: 0.01 + m_DefaultSolverIterations: 6 + m_DefaultSolverVelocityIterations: 1 + m_QueriesHitBackfaces: 0 + m_QueriesHitTriggers: 1 + m_EnableAdaptiveForce: 0 + m_ClothInterCollisionDistance: 0 + m_ClothInterCollisionStiffness: 0 + m_ContactsGeneration: 1 + m_LayerCollisionMatrix: ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + m_AutoSimulation: 1 + m_AutoSyncTransforms: 0 + m_ReuseCollisionCallbacks: 1 + m_ClothInterCollisionSettingsToggle: 0 + m_ContactPairsMode: 0 + m_BroadphaseType: 0 + m_WorldBounds: + m_Center: {x: 0, y: 0, z: 0} + m_Extent: {x: 250, y: 250, z: 250} + m_WorldSubdivisions: 8 + m_FrictionType: 0 + m_EnableEnhancedDeterminism: 0 + m_EnableUnifiedHeightmaps: 1 + m_DefaultMaxAngluarSpeed: 7 diff --git a/ProjectSettings/EditorBuildSettings.asset b/ProjectSettings/EditorBuildSettings.asset new file mode 100644 index 0000000..0147887 --- /dev/null +++ b/ProjectSettings/EditorBuildSettings.asset @@ -0,0 +1,8 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!1045 &1 +EditorBuildSettings: + m_ObjectHideFlags: 0 + serializedVersion: 2 + m_Scenes: [] + m_configObjects: {} diff --git a/ProjectSettings/EditorSettings.asset b/ProjectSettings/EditorSettings.asset new file mode 100644 index 0000000..1e44a0a --- /dev/null +++ b/ProjectSettings/EditorSettings.asset @@ -0,0 +1,30 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!159 &1 +EditorSettings: + m_ObjectHideFlags: 0 + serializedVersion: 11 + m_ExternalVersionControlSupport: Visible Meta Files + m_SerializationMode: 2 + m_LineEndingsForNewScripts: 0 + m_DefaultBehaviorMode: 0 + m_PrefabRegularEnvironment: {fileID: 0} + m_PrefabUIEnvironment: {fileID: 0} + m_SpritePackerMode: 0 + m_SpritePackerPaddingPower: 1 + m_EtcTextureCompressorBehavior: 1 + m_EtcTextureFastCompressor: 1 + m_EtcTextureNormalCompressor: 2 + m_EtcTextureBestCompressor: 4 + m_ProjectGenerationIncludedExtensions: txt;xml;fnt;cd;asmdef;rsp;asmref + m_ProjectGenerationRootNamespace: + m_CollabEditorSettings: + inProgressEnabled: 1 + m_EnableTextureStreamingInEditMode: 1 + m_EnableTextureStreamingInPlayMode: 1 + m_AsyncShaderCompilation: 1 + m_EnterPlayModeOptionsEnabled: 0 + m_EnterPlayModeOptions: 3 + m_ShowLightmapResolutionOverlay: 1 + m_UseLegacyProbeSampleCount: 0 + m_SerializeInlineMappingsOnOneLine: 1 diff --git a/ProjectSettings/GameCoreScarlettSettings.asset b/ProjectSettings/GameCoreScarlettSettings.asset new file mode 100644 index 0000000..5728631 --- /dev/null +++ b/ProjectSettings/GameCoreScarlettSettings.asset @@ -0,0 +1,11 @@ +{ + "buildSubtarget": 0, + "deploymentMethod": 0, + "deploymentDrive": 0, + "scid": "00000000-0000-0000-0000-000000000000", + "packageEncryption": 0, + "xboxSeriesSResolutionWidth": 2560, + "xboxSeriesSResolutionHeight": 1440, + "xboxSeriesXResolutionWidth": 3840, + "xboxSeriesXResolutionHeight": 2160 +} \ No newline at end of file diff --git a/ProjectSettings/GameCoreXboxOneSettings.asset b/ProjectSettings/GameCoreXboxOneSettings.asset new file mode 100644 index 0000000..0d3d482 --- /dev/null +++ b/ProjectSettings/GameCoreXboxOneSettings.asset @@ -0,0 +1,14 @@ +{ + "buildSubtarget": 0, + "deploymentMethod": 0, + "deploymentDrive": 0, + "scid": "00000000-0000-0000-0000-000000000000", + "packageEncryption": 0, + "xboxOneResolutionWidth": 1920, + "xboxOneResolutionHeight": 1080, + "xboxOneSResolutionWidth": 1920, + "xboxOneSResolutionHeight": 1080, + "xboxOneXResolutionWidth": 3840, + "xboxOneXResolutionHeight": 2160, + "esramDisabled": false +} \ No newline at end of file diff --git a/ProjectSettings/GraphicsSettings.asset b/ProjectSettings/GraphicsSettings.asset new file mode 100644 index 0000000..43369e3 --- /dev/null +++ b/ProjectSettings/GraphicsSettings.asset @@ -0,0 +1,63 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!30 &1 +GraphicsSettings: + m_ObjectHideFlags: 0 + serializedVersion: 13 + m_Deferred: + m_Mode: 1 + m_Shader: {fileID: 69, guid: 0000000000000000f000000000000000, type: 0} + m_DeferredReflections: + m_Mode: 1 + m_Shader: {fileID: 74, guid: 0000000000000000f000000000000000, type: 0} + m_ScreenSpaceShadows: + m_Mode: 1 + m_Shader: {fileID: 64, guid: 0000000000000000f000000000000000, type: 0} + m_LegacyDeferred: + m_Mode: 1 + m_Shader: {fileID: 63, guid: 0000000000000000f000000000000000, type: 0} + m_DepthNormals: + m_Mode: 1 + m_Shader: {fileID: 62, guid: 0000000000000000f000000000000000, type: 0} + m_MotionVectors: + m_Mode: 1 + m_Shader: {fileID: 75, guid: 0000000000000000f000000000000000, type: 0} + m_LightHalo: + m_Mode: 1 + m_Shader: {fileID: 105, guid: 0000000000000000f000000000000000, type: 0} + m_LensFlare: + m_Mode: 1 + m_Shader: {fileID: 102, guid: 0000000000000000f000000000000000, type: 0} + m_AlwaysIncludedShaders: + - {fileID: 7, guid: 0000000000000000f000000000000000, type: 0} + - {fileID: 15104, guid: 0000000000000000f000000000000000, type: 0} + - {fileID: 15105, guid: 0000000000000000f000000000000000, type: 0} + - {fileID: 15106, guid: 0000000000000000f000000000000000, type: 0} + - {fileID: 10753, guid: 0000000000000000f000000000000000, type: 0} + - {fileID: 10770, guid: 0000000000000000f000000000000000, type: 0} + m_PreloadedShaders: [] + m_SpritesDefaultMaterial: {fileID: 10754, guid: 0000000000000000f000000000000000, + type: 0} + m_CustomRenderPipeline: {fileID: 0} + m_TransparencySortMode: 0 + m_TransparencySortAxis: {x: 0, y: 0, z: 1} + m_DefaultRenderingPath: 1 + m_DefaultMobileRenderingPath: 1 + m_TierSettings: [] + m_LightmapStripping: 0 + m_FogStripping: 0 + m_InstancingStripping: 0 + m_LightmapKeepPlain: 1 + m_LightmapKeepDirCombined: 1 + m_LightmapKeepDynamicPlain: 1 + m_LightmapKeepDynamicDirCombined: 1 + m_LightmapKeepShadowMask: 1 + m_LightmapKeepSubtractive: 1 + m_FogKeepLinear: 1 + m_FogKeepExp: 1 + m_FogKeepExp2: 1 + m_AlbedoSwatchInfos: [] + m_LightsUseLinearIntensity: 0 + m_LightsUseColorTemperature: 0 + m_LogWhenShaderIsCompiled: 0 + m_AllowEnlightenSupportForUpgradedProject: 0 diff --git a/ProjectSettings/InputManager.asset b/ProjectSettings/InputManager.asset new file mode 100644 index 0000000..17c8f53 --- /dev/null +++ b/ProjectSettings/InputManager.asset @@ -0,0 +1,295 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!13 &1 +InputManager: + m_ObjectHideFlags: 0 + serializedVersion: 2 + m_Axes: + - serializedVersion: 3 + m_Name: Horizontal + descriptiveName: + descriptiveNegativeName: + negativeButton: left + positiveButton: right + altNegativeButton: a + altPositiveButton: d + gravity: 3 + dead: 0.001 + sensitivity: 3 + snap: 1 + invert: 0 + type: 0 + axis: 0 + joyNum: 0 + - serializedVersion: 3 + m_Name: Vertical + descriptiveName: + descriptiveNegativeName: + negativeButton: down + positiveButton: up + altNegativeButton: s + altPositiveButton: w + gravity: 3 + dead: 0.001 + sensitivity: 3 + snap: 1 + invert: 0 + type: 0 + axis: 0 + joyNum: 0 + - serializedVersion: 3 + m_Name: Fire1 + descriptiveName: + descriptiveNegativeName: + negativeButton: + positiveButton: left ctrl + altNegativeButton: + altPositiveButton: mouse 0 + gravity: 1000 + dead: 0.001 + sensitivity: 1000 + snap: 0 + invert: 0 + type: 0 + axis: 0 + joyNum: 0 + - serializedVersion: 3 + m_Name: Fire2 + descriptiveName: + descriptiveNegativeName: + negativeButton: + positiveButton: left alt + altNegativeButton: + altPositiveButton: mouse 1 + gravity: 1000 + dead: 0.001 + sensitivity: 1000 + snap: 0 + invert: 0 + type: 0 + axis: 0 + joyNum: 0 + - serializedVersion: 3 + m_Name: Fire3 + descriptiveName: + descriptiveNegativeName: + negativeButton: + positiveButton: left shift + altNegativeButton: + altPositiveButton: mouse 2 + gravity: 1000 + dead: 0.001 + sensitivity: 1000 + snap: 0 + invert: 0 + type: 0 + axis: 0 + joyNum: 0 + - serializedVersion: 3 + m_Name: Jump + descriptiveName: + descriptiveNegativeName: + negativeButton: + positiveButton: space + altNegativeButton: + altPositiveButton: + gravity: 1000 + dead: 0.001 + sensitivity: 1000 + snap: 0 + invert: 0 + type: 0 + axis: 0 + joyNum: 0 + - serializedVersion: 3 + m_Name: Mouse X + descriptiveName: + descriptiveNegativeName: + negativeButton: + positiveButton: + altNegativeButton: + altPositiveButton: + gravity: 0 + dead: 0 + sensitivity: 0.1 + snap: 0 + invert: 0 + type: 1 + axis: 0 + joyNum: 0 + - serializedVersion: 3 + m_Name: Mouse Y + descriptiveName: + descriptiveNegativeName: + negativeButton: + positiveButton: + altNegativeButton: + altPositiveButton: + gravity: 0 + dead: 0 + sensitivity: 0.1 + snap: 0 + invert: 0 + type: 1 + axis: 1 + joyNum: 0 + - serializedVersion: 3 + m_Name: Mouse ScrollWheel + descriptiveName: + descriptiveNegativeName: + negativeButton: + positiveButton: + altNegativeButton: + altPositiveButton: + gravity: 0 + dead: 0 + sensitivity: 0.1 + snap: 0 + invert: 0 + type: 1 + axis: 2 + joyNum: 0 + - serializedVersion: 3 + m_Name: Horizontal + descriptiveName: + descriptiveNegativeName: + negativeButton: + positiveButton: + altNegativeButton: + altPositiveButton: + gravity: 0 + dead: 0.19 + sensitivity: 1 + snap: 0 + invert: 0 + type: 2 + axis: 0 + joyNum: 0 + - serializedVersion: 3 + m_Name: Vertical + descriptiveName: + descriptiveNegativeName: + negativeButton: + positiveButton: + altNegativeButton: + altPositiveButton: + gravity: 0 + dead: 0.19 + sensitivity: 1 + snap: 0 + invert: 1 + type: 2 + axis: 1 + joyNum: 0 + - serializedVersion: 3 + m_Name: Fire1 + descriptiveName: + descriptiveNegativeName: + negativeButton: + positiveButton: joystick button 0 + altNegativeButton: + altPositiveButton: + gravity: 1000 + dead: 0.001 + sensitivity: 1000 + snap: 0 + invert: 0 + type: 0 + axis: 0 + joyNum: 0 + - serializedVersion: 3 + m_Name: Fire2 + descriptiveName: + descriptiveNegativeName: + negativeButton: + positiveButton: joystick button 1 + altNegativeButton: + altPositiveButton: + gravity: 1000 + dead: 0.001 + sensitivity: 1000 + snap: 0 + invert: 0 + type: 0 + axis: 0 + joyNum: 0 + - serializedVersion: 3 + m_Name: Fire3 + descriptiveName: + descriptiveNegativeName: + negativeButton: + positiveButton: joystick button 2 + altNegativeButton: + altPositiveButton: + gravity: 1000 + dead: 0.001 + sensitivity: 1000 + snap: 0 + invert: 0 + type: 0 + axis: 0 + joyNum: 0 + - serializedVersion: 3 + m_Name: Jump + descriptiveName: + descriptiveNegativeName: + negativeButton: + positiveButton: joystick button 3 + altNegativeButton: + altPositiveButton: + gravity: 1000 + dead: 0.001 + sensitivity: 1000 + snap: 0 + invert: 0 + type: 0 + axis: 0 + joyNum: 0 + - serializedVersion: 3 + m_Name: Submit + descriptiveName: + descriptiveNegativeName: + negativeButton: + positiveButton: return + altNegativeButton: + altPositiveButton: joystick button 0 + gravity: 1000 + dead: 0.001 + sensitivity: 1000 + snap: 0 + invert: 0 + type: 0 + axis: 0 + joyNum: 0 + - serializedVersion: 3 + m_Name: Submit + descriptiveName: + descriptiveNegativeName: + negativeButton: + positiveButton: enter + altNegativeButton: + altPositiveButton: space + gravity: 1000 + dead: 0.001 + sensitivity: 1000 + snap: 0 + invert: 0 + type: 0 + axis: 0 + joyNum: 0 + - serializedVersion: 3 + m_Name: Cancel + descriptiveName: + descriptiveNegativeName: + negativeButton: + positiveButton: escape + altNegativeButton: + altPositiveButton: joystick button 1 + gravity: 1000 + dead: 0.001 + sensitivity: 1000 + snap: 0 + invert: 0 + type: 0 + axis: 0 + joyNum: 0 diff --git a/ProjectSettings/MemorySettings.asset b/ProjectSettings/MemorySettings.asset new file mode 100644 index 0000000..5b5face --- /dev/null +++ b/ProjectSettings/MemorySettings.asset @@ -0,0 +1,35 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!387306366 &1 +MemorySettings: + m_ObjectHideFlags: 0 + m_EditorMemorySettings: + m_MainAllocatorBlockSize: -1 + m_ThreadAllocatorBlockSize: -1 + m_MainGfxBlockSize: -1 + m_ThreadGfxBlockSize: -1 + m_CacheBlockSize: -1 + m_TypetreeBlockSize: -1 + m_ProfilerBlockSize: -1 + m_ProfilerEditorBlockSize: -1 + m_BucketAllocatorGranularity: -1 + m_BucketAllocatorBucketsCount: -1 + m_BucketAllocatorBlockSize: -1 + m_BucketAllocatorBlockCount: -1 + m_ProfilerBucketAllocatorGranularity: -1 + m_ProfilerBucketAllocatorBucketsCount: -1 + m_ProfilerBucketAllocatorBlockSize: -1 + m_ProfilerBucketAllocatorBlockCount: -1 + m_TempAllocatorSizeMain: -1 + m_JobTempAllocatorBlockSize: -1 + m_BackgroundJobTempAllocatorBlockSize: -1 + m_JobTempAllocatorReducedBlockSize: -1 + m_TempAllocatorSizeGIBakingWorker: -1 + m_TempAllocatorSizeNavMeshWorker: -1 + m_TempAllocatorSizeAudioWorker: -1 + m_TempAllocatorSizeCloudWorker: -1 + m_TempAllocatorSizeGfx: -1 + m_TempAllocatorSizeJobWorker: -1 + m_TempAllocatorSizeBackgroundWorker: -1 + m_TempAllocatorSizePreloadManager: -1 + m_PlatformMemorySettings: {} diff --git a/ProjectSettings/NavMeshAreas.asset b/ProjectSettings/NavMeshAreas.asset new file mode 100644 index 0000000..3b0b7c3 --- /dev/null +++ b/ProjectSettings/NavMeshAreas.asset @@ -0,0 +1,91 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!126 &1 +NavMeshProjectSettings: + m_ObjectHideFlags: 0 + serializedVersion: 2 + areas: + - name: Walkable + cost: 1 + - name: Not Walkable + cost: 1 + - name: Jump + cost: 2 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + - name: + cost: 1 + m_LastAgentTypeID: -887442657 + m_Settings: + - serializedVersion: 2 + agentTypeID: 0 + agentRadius: 0.5 + agentHeight: 2 + agentSlope: 45 + agentClimb: 0.75 + ledgeDropHeight: 0 + maxJumpAcrossDistance: 0 + minRegionArea: 2 + manualCellSize: 0 + cellSize: 0.16666667 + manualTileSize: 0 + tileSize: 256 + accuratePlacement: 0 + debug: + m_Flags: 0 + m_SettingNames: + - Humanoid diff --git a/ProjectSettings/PS5Settings.json b/ProjectSettings/PS5Settings.json new file mode 100644 index 0000000..31e54b6 --- /dev/null +++ b/ProjectSettings/PS5Settings.json @@ -0,0 +1,56 @@ +{ + "MonoBehaviour": { + "m_Enabled": true, + "m_EditorHideFlags": 0, + "m_Name": "", + "m_EditorClassIdentifier": "UnityEditor.PS5.Extensions:UnityEditor.PS5:PS5Settings", + "npConfigZipPath": "", + "monoEnv": "", + "scriptOptimizationLevel": 2, + "enableApplicationExit": false, + "resetTempFolder": true, + "disableAutoHideSplash": false, + "playerPrefsSupport": false, + "appType": 0, + "restrictedAudioUsageRights": false, + "backgroundImagePath": "", + "startupBackgroundImagePath": "", + "startupForegroundImagePath": "", + "startupImagesFolder": "", + "iconImagesFolder": "", + "bgmPath": "", + "playerPrefsMaxSize": 32768, + "videoOutInitialWidth": 1920, + "useResolutionFallback": false, + "videoOutPixelFormat": 0, + "videoOutOutputMode": 1, + "paramFilePath": "", + "passcode": "vY_2a7KGxaA8HPuZY4oGcMfY_1WFzmys", + "updateReferencePackage": "", + "includedModules": [ + "libc.prx", + "libSceFace.prx", + "libSceFaceTracker.prx", + "libSceJobManager.prx", + "libSceJobManager_nosubmission.prx", + "libSceNpCppWebApi.prx", + "libScePfs.prx" + ], + "sharedBinaryContentLabels": [], + "sharedBinarySystemFolders": [], + "workspaceName": "workspace0", + "buildCompressionType": 0, + "buildCompressionLevel": 0, + "keepPackageFiles": false, + "buildSubtarget": 0, + "sdkOverride": "", + "saveDataImagePath": "", + "configFileParsed": false, + "operatingSystemCanDisableSplashScreen": false, + "supportsVR": false, + "requiresVR": false, + "vrrSupport": 0, + "psvr2PlayAreaSupport": 0, + "psvr2DetailedGazeTrackingStatusEnabled": false + } +} \ No newline at end of file diff --git a/ProjectSettings/PackageManagerSettings.asset b/ProjectSettings/PackageManagerSettings.asset new file mode 100644 index 0000000..112a053 --- /dev/null +++ b/ProjectSettings/PackageManagerSettings.asset @@ -0,0 +1,35 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!114 &1 +MonoBehaviour: + m_ObjectHideFlags: 61 + m_CorrespondingSourceObject: {fileID: 0} + m_PrefabInstance: {fileID: 0} + m_PrefabAsset: {fileID: 0} + m_GameObject: {fileID: 0} + m_Enabled: 1 + m_EditorHideFlags: 0 + m_Script: {fileID: 13964, guid: 0000000000000000e000000000000000, type: 0} + m_Name: + m_EditorClassIdentifier: + m_EnablePreReleasePackages: 0 + m_EnablePackageDependencies: 0 + m_AdvancedSettingsExpanded: 1 + m_ScopedRegistriesSettingsExpanded: 1 + m_SeeAllPackageVersions: 0 + oneTimeWarningShown: 0 + m_Registries: + - m_Id: main + m_Name: + m_Url: https://packages.unity.com + m_Scopes: [] + m_IsDefault: 1 + m_Capabilities: 7 + m_UserSelectedRegistryName: + m_UserAddingNewScopedRegistry: 0 + m_RegistryInfoDraft: + m_Modified: 0 + m_ErrorMessage: + m_UserModificationsInstanceId: -830 + m_OriginalInstanceId: -832 + m_LoadAssets: 0 diff --git a/ProjectSettings/Packages/com.unity.testtools.codecoverage/Settings.json b/ProjectSettings/Packages/com.unity.testtools.codecoverage/Settings.json new file mode 100644 index 0000000..ad11087 --- /dev/null +++ b/ProjectSettings/Packages/com.unity.testtools.codecoverage/Settings.json @@ -0,0 +1,7 @@ +{ + "m_Name": "Settings", + "m_Path": "ProjectSettings/Packages/com.unity.testtools.codecoverage/Settings.json", + "m_Dictionary": { + "m_DictionaryValues": [] + } +} \ No newline at end of file diff --git a/ProjectSettings/Physics2DSettings.asset b/ProjectSettings/Physics2DSettings.asset new file mode 100644 index 0000000..47880b1 --- /dev/null +++ b/ProjectSettings/Physics2DSettings.asset @@ -0,0 +1,56 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!19 &1 +Physics2DSettings: + m_ObjectHideFlags: 0 + serializedVersion: 4 + m_Gravity: {x: 0, y: -9.81} + m_DefaultMaterial: {fileID: 0} + m_VelocityIterations: 8 + m_PositionIterations: 3 + m_VelocityThreshold: 1 + m_MaxLinearCorrection: 0.2 + m_MaxAngularCorrection: 8 + m_MaxTranslationSpeed: 100 + m_MaxRotationSpeed: 360 + m_BaumgarteScale: 0.2 + m_BaumgarteTimeOfImpactScale: 0.75 + m_TimeToSleep: 0.5 + m_LinearSleepTolerance: 0.01 + m_AngularSleepTolerance: 2 + m_DefaultContactOffset: 0.01 + m_JobOptions: + serializedVersion: 2 + useMultithreading: 0 + useConsistencySorting: 0 + m_InterpolationPosesPerJob: 100 + m_NewContactsPerJob: 30 + m_CollideContactsPerJob: 100 + m_ClearFlagsPerJob: 200 + m_ClearBodyForcesPerJob: 200 + m_SyncDiscreteFixturesPerJob: 50 + m_SyncContinuousFixturesPerJob: 50 + m_FindNearestContactsPerJob: 100 + m_UpdateTriggerContactsPerJob: 100 + m_IslandSolverCostThreshold: 100 + m_IslandSolverBodyCostScale: 1 + m_IslandSolverContactCostScale: 10 + m_IslandSolverJointCostScale: 10 + m_IslandSolverBodiesPerJob: 50 + m_IslandSolverContactsPerJob: 50 + m_AutoSimulation: 1 + m_QueriesHitTriggers: 1 + m_QueriesStartInColliders: 1 + m_CallbacksOnDisable: 1 + m_ReuseCollisionCallbacks: 1 + m_AutoSyncTransforms: 0 + m_AlwaysShowColliders: 0 + m_ShowColliderSleep: 1 + m_ShowColliderContacts: 0 + m_ShowColliderAABB: 0 + m_ContactArrowScale: 0.2 + m_ColliderAwakeColor: {r: 0.5686275, g: 0.95686275, b: 0.54509807, a: 0.7529412} + m_ColliderAsleepColor: {r: 0.5686275, g: 0.95686275, b: 0.54509807, a: 0.36078432} + m_ColliderContactColor: {r: 1, g: 0, b: 1, a: 0.6862745} + m_ColliderAABBColor: {r: 1, g: 1, b: 0, a: 0.2509804} + m_LayerCollisionMatrix: ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff diff --git a/ProjectSettings/PresetManager.asset b/ProjectSettings/PresetManager.asset new file mode 100644 index 0000000..67a94da --- /dev/null +++ b/ProjectSettings/PresetManager.asset @@ -0,0 +1,7 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!1386491679 &1 +PresetManager: + m_ObjectHideFlags: 0 + serializedVersion: 2 + m_DefaultPresets: {} diff --git a/ProjectSettings/ProjectSettings.asset b/ProjectSettings/ProjectSettings.asset new file mode 100644 index 0000000..41c1783 --- /dev/null +++ b/ProjectSettings/ProjectSettings.asset @@ -0,0 +1,723 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!129 &1 +PlayerSettings: + m_ObjectHideFlags: 0 + serializedVersion: 23 + productGUID: 20a79ce4e9cd3e842ac458d837c5a65c + AndroidProfiler: 0 + AndroidFilterTouchesWhenObscured: 0 + AndroidEnableSustainedPerformanceMode: 0 + defaultScreenOrientation: 4 + targetDevice: 2 + useOnDemandResources: 0 + accelerometerFrequency: 60 + companyName: DefaultCompany + productName: FSR2Test + defaultCursor: {fileID: 0} + cursorHotspot: {x: 0, y: 0} + m_SplashScreenBackgroundColor: {r: 0.13725491, g: 0.12156863, b: 0.1254902, a: 1} + m_ShowUnitySplashScreen: 1 + m_ShowUnitySplashLogo: 1 + m_SplashScreenOverlayOpacity: 1 + m_SplashScreenAnimation: 1 + m_SplashScreenLogoStyle: 1 + m_SplashScreenDrawMode: 0 + m_SplashScreenBackgroundAnimationZoom: 1 + m_SplashScreenLogoAnimationZoom: 1 + m_SplashScreenBackgroundLandscapeAspect: 1 + m_SplashScreenBackgroundPortraitAspect: 1 + m_SplashScreenBackgroundLandscapeUvs: + serializedVersion: 2 + x: 0 + y: 0 + width: 1 + height: 1 + m_SplashScreenBackgroundPortraitUvs: + serializedVersion: 2 + x: 0 + y: 0 + width: 1 + height: 1 + m_SplashScreenLogos: [] + m_VirtualRealitySplashScreen: {fileID: 0} + m_HolographicTrackingLossScreen: {fileID: 0} + defaultScreenWidth: 1920 + defaultScreenHeight: 1080 + defaultScreenWidthWeb: 960 + defaultScreenHeightWeb: 600 + m_StereoRenderingPath: 0 + m_ActiveColorSpace: 0 + m_MTRendering: 1 + mipStripping: 0 + numberOfMipsStripped: 0 + m_StackTraceTypes: 010000000100000001000000010000000100000001000000 + iosShowActivityIndicatorOnLoading: -1 + androidShowActivityIndicatorOnLoading: -1 + iosUseCustomAppBackgroundBehavior: 0 + iosAllowHTTPDownload: 1 + allowedAutorotateToPortrait: 1 + allowedAutorotateToPortraitUpsideDown: 1 + allowedAutorotateToLandscapeRight: 1 + allowedAutorotateToLandscapeLeft: 1 + useOSAutorotation: 1 + use32BitDisplayBuffer: 1 + preserveFramebufferAlpha: 0 + disableDepthAndStencilBuffers: 0 + androidStartInFullscreen: 1 + androidRenderOutsideSafeArea: 1 + androidUseSwappy: 1 + androidBlitType: 0 + androidResizableWindow: 0 + androidDefaultWindowWidth: 1920 + androidDefaultWindowHeight: 1080 + androidMinimumWindowWidth: 400 + androidMinimumWindowHeight: 300 + androidFullscreenMode: 1 + defaultIsNativeResolution: 1 + macRetinaSupport: 1 + runInBackground: 1 + captureSingleScreen: 0 + muteOtherAudioSources: 0 + Prepare IOS For Recording: 0 + Force IOS Speakers When Recording: 0 + deferSystemGesturesMode: 0 + hideHomeButton: 0 + submitAnalytics: 1 + usePlayerLog: 1 + bakeCollisionMeshes: 0 + forceSingleInstance: 0 + useFlipModelSwapchain: 1 + resizableWindow: 0 + useMacAppStoreValidation: 0 + macAppStoreCategory: public.app-category.games + gpuSkinning: 1 + xboxPIXTextureCapture: 0 + xboxEnableAvatar: 0 + xboxEnableKinect: 0 + xboxEnableKinectAutoTracking: 0 + xboxEnableFitness: 0 + visibleInBackground: 1 + allowFullscreenSwitch: 1 + fullscreenMode: 1 + xboxSpeechDB: 0 + xboxEnableHeadOrientation: 0 + xboxEnableGuest: 0 + xboxEnablePIXSampling: 0 + metalFramebufferOnly: 0 + xboxOneResolution: 0 + xboxOneSResolution: 0 + xboxOneXResolution: 3 + xboxOneMonoLoggingLevel: 0 + xboxOneLoggingLevel: 1 + xboxOneDisableEsram: 0 + xboxOneEnableTypeOptimization: 0 + xboxOnePresentImmediateThreshold: 0 + switchQueueCommandMemory: 0 + switchQueueControlMemory: 16384 + switchQueueComputeMemory: 262144 + switchNVNShaderPoolsGranularity: 33554432 + switchNVNDefaultPoolsGranularity: 16777216 + switchNVNOtherPoolsGranularity: 16777216 + switchNVNMaxPublicTextureIDCount: 0 + switchNVNMaxPublicSamplerIDCount: 0 + stadiaPresentMode: 0 + stadiaTargetFramerate: 0 + vulkanNumSwapchainBuffers: 3 + vulkanEnableSetSRGBWrite: 0 + vulkanEnablePreTransform: 1 + vulkanEnableLateAcquireNextImage: 0 + vulkanEnableCommandBufferRecycling: 1 + m_SupportedAspectRatios: + 4:3: 1 + 5:4: 1 + 16:10: 1 + 16:9: 1 + Others: 1 + bundleVersion: 0.1 + preloadedAssets: [] + metroInputSource: 0 + wsaTransparentSwapchain: 0 + m_HolographicPauseOnTrackingLoss: 1 + xboxOneDisableKinectGpuReservation: 1 + xboxOneEnable7thCore: 1 + vrSettings: + enable360StereoCapture: 0 + isWsaHolographicRemotingEnabled: 0 + enableFrameTimingStats: 0 + enableOpenGLProfilerGPURecorders: 1 + useHDRDisplay: 0 + D3DHDRBitDepth: 0 + m_ColorGamuts: 00000000 + targetPixelDensity: 30 + resolutionScalingMode: 0 + resetResolutionOnWindowResize: 0 + androidSupportedAspectRatio: 1 + androidMaxAspectRatio: 2.1 + applicationIdentifier: {} + buildNumber: + Standalone: 0 + iPhone: 0 + tvOS: 0 + overrideDefaultApplicationIdentifier: 0 + AndroidBundleVersionCode: 1 + AndroidMinSdkVersion: 22 + AndroidTargetSdkVersion: 0 + AndroidPreferredInstallLocation: 1 + aotOptions: + stripEngineCode: 1 + iPhoneStrippingLevel: 0 + iPhoneScriptCallOptimization: 0 + ForceInternetPermission: 0 + ForceSDCardPermission: 0 + CreateWallpaper: 0 + APKExpansionFiles: 0 + keepLoadedShadersAlive: 0 + StripUnusedMeshComponents: 1 + VertexChannelCompressionMask: 4054 + iPhoneSdkVersion: 988 + iOSTargetOSVersionString: 11.0 + tvOSSdkVersion: 0 + tvOSRequireExtendedGameController: 0 + tvOSTargetOSVersionString: 11.0 + uIPrerenderedIcon: 0 + uIRequiresPersistentWiFi: 0 + uIRequiresFullScreen: 1 + uIStatusBarHidden: 1 + uIExitOnSuspend: 0 + uIStatusBarStyle: 0 + appleTVSplashScreen: {fileID: 0} + appleTVSplashScreen2x: {fileID: 0} + tvOSSmallIconLayers: [] + tvOSSmallIconLayers2x: [] + tvOSLargeIconLayers: [] + tvOSLargeIconLayers2x: [] + tvOSTopShelfImageLayers: [] + tvOSTopShelfImageLayers2x: [] + tvOSTopShelfImageWideLayers: [] + tvOSTopShelfImageWideLayers2x: [] + iOSLaunchScreenType: 0 + iOSLaunchScreenPortrait: {fileID: 0} + iOSLaunchScreenLandscape: {fileID: 0} + iOSLaunchScreenBackgroundColor: + serializedVersion: 2 + rgba: 0 + iOSLaunchScreenFillPct: 100 + iOSLaunchScreenSize: 100 + iOSLaunchScreenCustomXibPath: + iOSLaunchScreeniPadType: 0 + iOSLaunchScreeniPadImage: {fileID: 0} + iOSLaunchScreeniPadBackgroundColor: + serializedVersion: 2 + rgba: 0 + iOSLaunchScreeniPadFillPct: 100 + iOSLaunchScreeniPadSize: 100 + iOSLaunchScreeniPadCustomXibPath: + iOSLaunchScreenCustomStoryboardPath: + iOSLaunchScreeniPadCustomStoryboardPath: + iOSDeviceRequirements: [] + iOSURLSchemes: [] + macOSURLSchemes: [] + iOSBackgroundModes: 0 + iOSMetalForceHardShadows: 0 + metalEditorSupport: 1 + metalAPIValidation: 1 + iOSRenderExtraFrameOnPause: 0 + iosCopyPluginsCodeInsteadOfSymlink: 0 + appleDeveloperTeamID: + iOSManualSigningProvisioningProfileID: + tvOSManualSigningProvisioningProfileID: + iOSManualSigningProvisioningProfileType: 0 + tvOSManualSigningProvisioningProfileType: 0 + appleEnableAutomaticSigning: 0 + iOSRequireARKit: 0 + iOSAutomaticallyDetectAndAddCapabilities: 1 + appleEnableProMotion: 0 + shaderPrecisionModel: 0 + clonedFromGUID: c0afd0d1d80e3634a9dac47e8a0426ea + templatePackageId: com.unity.template.3d@8.1.0 + templateDefaultScene: Assets/Scenes/SampleScene.unity + useCustomMainManifest: 0 + useCustomLauncherManifest: 0 + useCustomMainGradleTemplate: 0 + useCustomLauncherGradleManifest: 0 + useCustomBaseGradleTemplate: 0 + useCustomGradlePropertiesTemplate: 0 + useCustomProguardFile: 0 + AndroidTargetArchitectures: 1 + AndroidTargetDevices: 0 + AndroidSplashScreenScale: 0 + androidSplashScreen: {fileID: 0} + AndroidKeystoreName: + AndroidKeyaliasName: + AndroidBuildApkPerCpuArchitecture: 0 + AndroidTVCompatibility: 0 + AndroidIsGame: 1 + AndroidEnableTango: 0 + androidEnableBanner: 1 + androidUseLowAccuracyLocation: 0 + androidUseCustomKeystore: 0 + m_AndroidBanners: + - width: 320 + height: 180 + banner: {fileID: 0} + androidGamepadSupportLevel: 0 + chromeosInputEmulation: 1 + AndroidMinifyWithR8: 0 + AndroidMinifyRelease: 0 + AndroidMinifyDebug: 0 + AndroidValidateAppBundleSize: 1 + AndroidAppBundleSizeToValidate: 150 + m_BuildTargetIcons: [] + m_BuildTargetPlatformIcons: [] + m_BuildTargetBatching: + - m_BuildTarget: Standalone + m_StaticBatching: 1 + m_DynamicBatching: 0 + - m_BuildTarget: tvOS + m_StaticBatching: 1 + m_DynamicBatching: 0 + - m_BuildTarget: Android + m_StaticBatching: 1 + m_DynamicBatching: 0 + - m_BuildTarget: iPhone + m_StaticBatching: 1 + m_DynamicBatching: 0 + - m_BuildTarget: WebGL + m_StaticBatching: 0 + m_DynamicBatching: 0 + m_BuildTargetGraphicsJobs: + - m_BuildTarget: MacStandaloneSupport + m_GraphicsJobs: 0 + - m_BuildTarget: Switch + m_GraphicsJobs: 1 + - m_BuildTarget: MetroSupport + m_GraphicsJobs: 1 + - m_BuildTarget: AppleTVSupport + m_GraphicsJobs: 0 + - m_BuildTarget: BJMSupport + m_GraphicsJobs: 1 + - m_BuildTarget: LinuxStandaloneSupport + m_GraphicsJobs: 1 + - m_BuildTarget: PS4Player + m_GraphicsJobs: 1 + - m_BuildTarget: iOSSupport + m_GraphicsJobs: 0 + - m_BuildTarget: WindowsStandaloneSupport + m_GraphicsJobs: 1 + - m_BuildTarget: XboxOnePlayer + m_GraphicsJobs: 1 + - m_BuildTarget: LuminSupport + m_GraphicsJobs: 0 + - m_BuildTarget: AndroidPlayer + m_GraphicsJobs: 0 + - m_BuildTarget: WebGLSupport + m_GraphicsJobs: 0 + m_BuildTargetGraphicsJobMode: + - m_BuildTarget: PS4Player + m_GraphicsJobMode: 0 + - m_BuildTarget: XboxOnePlayer + m_GraphicsJobMode: 0 + m_BuildTargetGraphicsAPIs: + - m_BuildTarget: AndroidPlayer + m_APIs: 150000000b000000 + m_Automatic: 1 + - m_BuildTarget: iOSSupport + m_APIs: 10000000 + m_Automatic: 1 + - m_BuildTarget: AppleTVSupport + m_APIs: 10000000 + m_Automatic: 1 + - m_BuildTarget: WebGLSupport + m_APIs: 0b000000 + m_Automatic: 1 + m_BuildTargetVRSettings: + - m_BuildTarget: Standalone + m_Enabled: 0 + m_Devices: + - Oculus + - OpenVR + openGLRequireES31: 0 + openGLRequireES31AEP: 0 + openGLRequireES32: 0 + m_TemplateCustomTags: {} + mobileMTRendering: + Android: 1 + iPhone: 1 + tvOS: 1 + m_BuildTargetGroupLightmapEncodingQuality: + - m_BuildTarget: Android + m_EncodingQuality: 1 + - m_BuildTarget: iPhone + m_EncodingQuality: 1 + - m_BuildTarget: tvOS + m_EncodingQuality: 1 + m_BuildTargetGroupLightmapSettings: [] + m_BuildTargetNormalMapEncoding: + - m_BuildTarget: Android + m_Encoding: 1 + - m_BuildTarget: iPhone + m_Encoding: 1 + - m_BuildTarget: tvOS + m_Encoding: 1 + m_BuildTargetDefaultTextureCompressionFormat: + - m_BuildTarget: Android + m_Format: 3 + playModeTestRunnerEnabled: 0 + runPlayModeTestAsEditModeTest: 0 + actionOnDotNetUnhandledException: 1 + enableInternalProfiler: 0 + logObjCUncaughtExceptions: 1 + enableCrashReportAPI: 0 + cameraUsageDescription: + locationUsageDescription: + microphoneUsageDescription: + bluetoothUsageDescription: + switchNMETAOverride: + switchNetLibKey: + switchSocketMemoryPoolSize: 6144 + switchSocketAllocatorPoolSize: 128 + switchSocketConcurrencyLimit: 14 + switchScreenResolutionBehavior: 2 + switchUseCPUProfiler: 0 + switchUseGOLDLinker: 0 + switchLTOSetting: 0 + switchApplicationID: 0x01004b9000490000 + switchNSODependencies: + switchTitleNames_0: + switchTitleNames_1: + switchTitleNames_2: + switchTitleNames_3: + switchTitleNames_4: + switchTitleNames_5: + switchTitleNames_6: + switchTitleNames_7: + switchTitleNames_8: + switchTitleNames_9: + switchTitleNames_10: + switchTitleNames_11: + switchTitleNames_12: + switchTitleNames_13: + switchTitleNames_14: + switchTitleNames_15: + switchPublisherNames_0: + switchPublisherNames_1: + switchPublisherNames_2: + switchPublisherNames_3: + switchPublisherNames_4: + switchPublisherNames_5: + switchPublisherNames_6: + switchPublisherNames_7: + switchPublisherNames_8: + switchPublisherNames_9: + switchPublisherNames_10: + switchPublisherNames_11: + switchPublisherNames_12: + switchPublisherNames_13: + switchPublisherNames_14: + switchPublisherNames_15: + switchIcons_0: {fileID: 0} + switchIcons_1: {fileID: 0} + switchIcons_2: {fileID: 0} + switchIcons_3: {fileID: 0} + switchIcons_4: {fileID: 0} + switchIcons_5: {fileID: 0} + switchIcons_6: {fileID: 0} + switchIcons_7: {fileID: 0} + switchIcons_8: {fileID: 0} + switchIcons_9: {fileID: 0} + switchIcons_10: {fileID: 0} + switchIcons_11: {fileID: 0} + switchIcons_12: {fileID: 0} + switchIcons_13: {fileID: 0} + switchIcons_14: {fileID: 0} + switchIcons_15: {fileID: 0} + switchSmallIcons_0: {fileID: 0} + switchSmallIcons_1: {fileID: 0} + switchSmallIcons_2: {fileID: 0} + switchSmallIcons_3: {fileID: 0} + switchSmallIcons_4: {fileID: 0} + switchSmallIcons_5: {fileID: 0} + switchSmallIcons_6: {fileID: 0} + switchSmallIcons_7: {fileID: 0} + switchSmallIcons_8: {fileID: 0} + switchSmallIcons_9: {fileID: 0} + switchSmallIcons_10: {fileID: 0} + switchSmallIcons_11: {fileID: 0} + switchSmallIcons_12: {fileID: 0} + switchSmallIcons_13: {fileID: 0} + switchSmallIcons_14: {fileID: 0} + switchSmallIcons_15: {fileID: 0} + switchManualHTML: + switchAccessibleURLs: + switchLegalInformation: + switchMainThreadStackSize: 1048576 + switchPresenceGroupId: + switchLogoHandling: 0 + switchReleaseVersion: 0 + switchDisplayVersion: 1.0.0 + switchStartupUserAccount: 0 + switchTouchScreenUsage: 0 + switchSupportedLanguagesMask: 0 + switchLogoType: 0 + switchApplicationErrorCodeCategory: + switchUserAccountSaveDataSize: 0 + switchUserAccountSaveDataJournalSize: 0 + switchApplicationAttribute: 0 + switchCardSpecSize: -1 + switchCardSpecClock: -1 + switchRatingsMask: 0 + switchRatingsInt_0: 0 + switchRatingsInt_1: 0 + switchRatingsInt_2: 0 + switchRatingsInt_3: 0 + switchRatingsInt_4: 0 + switchRatingsInt_5: 0 + switchRatingsInt_6: 0 + switchRatingsInt_7: 0 + switchRatingsInt_8: 0 + switchRatingsInt_9: 0 + switchRatingsInt_10: 0 + switchRatingsInt_11: 0 + switchRatingsInt_12: 0 + switchLocalCommunicationIds_0: + switchLocalCommunicationIds_1: + switchLocalCommunicationIds_2: + switchLocalCommunicationIds_3: + switchLocalCommunicationIds_4: + switchLocalCommunicationIds_5: + switchLocalCommunicationIds_6: + switchLocalCommunicationIds_7: + switchParentalControl: 0 + switchAllowsScreenshot: 1 + switchAllowsVideoCapturing: 1 + switchAllowsRuntimeAddOnContentInstall: 0 + switchDataLossConfirmation: 0 + switchUserAccountLockEnabled: 0 + switchSystemResourceMemory: 16777216 + switchSupportedNpadStyles: 22 + switchNativeFsCacheSize: 32 + switchIsHoldTypeHorizontal: 0 + switchSupportedNpadCount: 8 + switchSocketConfigEnabled: 0 + switchTcpInitialSendBufferSize: 32 + switchTcpInitialReceiveBufferSize: 64 + switchTcpAutoSendBufferSizeMax: 256 + switchTcpAutoReceiveBufferSizeMax: 256 + switchUdpSendBufferSize: 9 + switchUdpReceiveBufferSize: 42 + switchSocketBufferEfficiency: 4 + switchSocketInitializeEnabled: 1 + switchNetworkInterfaceManagerInitializeEnabled: 1 + switchPlayerConnectionEnabled: 1 + switchUseNewStyleFilepaths: 0 + switchUseMicroSleepForYield: 1 + switchEnableRamDiskSupport: 0 + switchMicroSleepForYieldTime: 25 + switchRamDiskSpaceSize: 12 + ps4NPAgeRating: 12 + ps4NPTitleSecret: + ps4NPTrophyPackPath: + ps4ParentalLevel: 11 + ps4ContentID: ED1633-NPXX51362_00-0000000000000000 + ps4Category: 0 + ps4MasterVersion: 01.00 + ps4AppVersion: 01.00 + ps4AppType: 0 + ps4ParamSfxPath: + ps4VideoOutPixelFormat: 0 + ps4VideoOutInitialWidth: 1920 + ps4VideoOutBaseModeInitialWidth: 1920 + ps4VideoOutReprojectionRate: 60 + ps4PronunciationXMLPath: + ps4PronunciationSIGPath: + ps4BackgroundImagePath: + ps4StartupImagePath: + ps4StartupImagesFolder: + ps4IconImagesFolder: + ps4SaveDataImagePath: + ps4SdkOverride: + ps4BGMPath: + ps4ShareFilePath: + ps4ShareOverlayImagePath: + ps4PrivacyGuardImagePath: + ps4ExtraSceSysFile: + ps4NPtitleDatPath: + ps4RemotePlayKeyAssignment: -1 + ps4RemotePlayKeyMappingDir: + ps4PlayTogetherPlayerCount: 0 + ps4EnterButtonAssignment: 1 + ps4ApplicationParam1: 0 + ps4ApplicationParam2: 0 + ps4ApplicationParam3: 0 + ps4ApplicationParam4: 0 + ps4DownloadDataSize: 0 + ps4GarlicHeapSize: 2048 + ps4ProGarlicHeapSize: 2560 + playerPrefsMaxSize: 32768 + ps4Passcode: frAQBc8Wsa1xVPfvJcrgRYwTiizs2trQ + ps4pnSessions: 1 + ps4pnPresence: 1 + ps4pnFriends: 1 + ps4pnGameCustomData: 1 + playerPrefsSupport: 0 + enableApplicationExit: 0 + resetTempFolder: 1 + restrictedAudioUsageRights: 0 + ps4UseResolutionFallback: 0 + ps4ReprojectionSupport: 0 + ps4UseAudio3dBackend: 0 + ps4UseLowGarlicFragmentationMode: 1 + ps4SocialScreenEnabled: 0 + ps4ScriptOptimizationLevel: 0 + ps4Audio3dVirtualSpeakerCount: 14 + ps4attribCpuUsage: 0 + ps4PatchPkgPath: + ps4PatchLatestPkgPath: + ps4PatchChangeinfoPath: + ps4PatchDayOne: 0 + ps4attribUserManagement: 0 + ps4attribMoveSupport: 0 + ps4attrib3DSupport: 0 + ps4attribShareSupport: 0 + ps4attribExclusiveVR: 0 + ps4disableAutoHideSplash: 0 + ps4videoRecordingFeaturesUsed: 0 + ps4contentSearchFeaturesUsed: 0 + ps4CompatibilityPS5: 0 + ps4AllowPS5Detection: 0 + ps4GPU800MHz: 1 + ps4attribEyeToEyeDistanceSettingVR: 0 + ps4IncludedModules: + - libc.prx + - libSceAudioLatencyEstimation.prx + - libSceFace.prx + - libSceFaceTracker.prx + - libSceFios2.prx + - libSceHand.prx + - libSceHandTracker.prx + - libSceHeadTracker.prx + - libSceJobManager.prx + - libSceNpCppWebApi.prx + - libSceNpToolkit2.prx + - libSceS3DConversion.prx + ps4attribVROutputEnabled: 0 + monoEnv: + splashScreenBackgroundSourceLandscape: {fileID: 0} + splashScreenBackgroundSourcePortrait: {fileID: 0} + blurSplashScreenBackground: 1 + spritePackerPolicy: + webGLMemorySize: 16 + webGLExceptionSupport: 1 + webGLNameFilesAsHashes: 0 + webGLDataCaching: 1 + webGLDebugSymbols: 0 + webGLEmscriptenArgs: + webGLModulesDirectory: + webGLTemplate: APPLICATION:Default + webGLAnalyzeBuildSize: 0 + webGLUseEmbeddedResources: 0 + webGLCompressionFormat: 1 + webGLWasmArithmeticExceptions: 0 + webGLLinkerTarget: 1 + webGLThreadsSupport: 0 + webGLDecompressionFallback: 0 + scriptingDefineSymbols: {} + additionalCompilerArguments: {} + platformArchitecture: {} + scriptingBackend: {} + il2cppCompilerConfiguration: {} + managedStrippingLevel: {} + incrementalIl2cppBuild: {} + suppressCommonWarnings: 1 + allowUnsafeCode: 0 + useDeterministicCompilation: 1 + enableRoslynAnalyzers: 1 + additionalIl2CppArgs: + scriptingRuntimeVersion: 1 + gcIncremental: 1 + assemblyVersionValidation: 1 + gcWBarrierValidation: 0 + apiCompatibilityLevelPerPlatform: {} + m_RenderingPath: 1 + m_MobileRenderingPath: 1 + metroPackageName: Template_3D + metroPackageVersion: + metroCertificatePath: + metroCertificatePassword: + metroCertificateSubject: + metroCertificateIssuer: + metroCertificateNotAfter: 0000000000000000 + metroApplicationDescription: Template_3D + wsaImages: {} + metroTileShortName: + metroTileShowName: 0 + metroMediumTileShowName: 0 + metroLargeTileShowName: 0 + metroWideTileShowName: 0 + metroSupportStreamingInstall: 0 + metroLastRequiredScene: 0 + metroDefaultTileSize: 1 + metroTileForegroundText: 2 + metroTileBackgroundColor: {r: 0.13333334, g: 0.17254902, b: 0.21568628, a: 0} + metroSplashScreenBackgroundColor: {r: 0.12941177, g: 0.17254902, b: 0.21568628, a: 1} + metroSplashScreenUseBackgroundColor: 0 + platformCapabilities: {} + metroTargetDeviceFamilies: {} + metroFTAName: + metroFTAFileTypes: [] + metroProtocolName: + vcxProjDefaultLanguage: + XboxOneProductId: + XboxOneUpdateKey: + XboxOneSandboxId: + XboxOneContentId: + XboxOneTitleId: + XboxOneSCId: + XboxOneGameOsOverridePath: + XboxOnePackagingOverridePath: + XboxOneAppManifestOverridePath: + XboxOneVersion: 1.0.0.0 + XboxOnePackageEncryption: 0 + XboxOnePackageUpdateGranularity: 2 + XboxOneDescription: + XboxOneLanguage: + - enus + XboxOneCapability: [] + XboxOneGameRating: {} + XboxOneIsContentPackage: 0 + XboxOneEnhancedXboxCompatibilityMode: 0 + XboxOneEnableGPUVariability: 1 + XboxOneSockets: {} + XboxOneSplashScreen: {fileID: 0} + XboxOneAllowedProductIds: [] + XboxOnePersistentLocalStorageSize: 0 + XboxOneXTitleMemory: 8 + XboxOneOverrideIdentityName: + XboxOneOverrideIdentityPublisher: + vrEditorSettings: {} + cloudServicesEnabled: + UNet: 1 + luminIcon: + m_Name: + m_ModelFolderPath: + m_PortalFolderPath: + luminCert: + m_CertPath: + m_SignPackage: 1 + luminIsChannelApp: 0 + luminVersion: + m_VersionCode: 1 + m_VersionName: + apiCompatibilityLevel: 6 + activeInputHandler: 0 + cloudProjectId: + framebufferDepthMemorylessMode: 0 + qualitySettingsNames: [] + projectName: + organizationId: + cloudEnabled: 0 + legacyClampBlendShapeWeights: 0 + playerDataPath: + forceSRGBBlit: 1 + virtualTexturingSupportEnabled: 0 diff --git a/ProjectSettings/ProjectVersion.txt b/ProjectSettings/ProjectVersion.txt new file mode 100644 index 0000000..bdb3ba5 --- /dev/null +++ b/ProjectSettings/ProjectVersion.txt @@ -0,0 +1,2 @@ +m_EditorVersion: 2021.3.5f1 +m_EditorVersionWithRevision: 2021.3.5f1 (40eb3a945986) diff --git a/ProjectSettings/QualitySettings.asset b/ProjectSettings/QualitySettings.asset new file mode 100644 index 0000000..7b7658d --- /dev/null +++ b/ProjectSettings/QualitySettings.asset @@ -0,0 +1,232 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!47 &1 +QualitySettings: + m_ObjectHideFlags: 0 + serializedVersion: 5 + m_CurrentQuality: 5 + m_QualitySettings: + - serializedVersion: 2 + name: Very Low + pixelLightCount: 0 + shadows: 0 + shadowResolution: 0 + shadowProjection: 1 + shadowCascades: 1 + shadowDistance: 15 + shadowNearPlaneOffset: 3 + shadowCascade2Split: 0.33333334 + shadowCascade4Split: {x: 0.06666667, y: 0.2, z: 0.46666667} + shadowmaskMode: 0 + blendWeights: 1 + textureQuality: 1 + anisotropicTextures: 0 + antiAliasing: 0 + softParticles: 0 + softVegetation: 0 + realtimeReflectionProbes: 0 + billboardsFaceCameraPosition: 0 + vSyncCount: 0 + lodBias: 0.3 + maximumLODLevel: 0 + streamingMipmapsActive: 0 + streamingMipmapsAddAllCameras: 1 + streamingMipmapsMemoryBudget: 512 + streamingMipmapsRenderersPerFrame: 512 + streamingMipmapsMaxLevelReduction: 2 + streamingMipmapsMaxFileIORequests: 1024 + particleRaycastBudget: 4 + asyncUploadTimeSlice: 2 + asyncUploadBufferSize: 16 + asyncUploadPersistentBuffer: 1 + resolutionScalingFixedDPIFactor: 1 + excludedTargetPlatforms: [] + - serializedVersion: 2 + name: Low + pixelLightCount: 0 + shadows: 0 + shadowResolution: 0 + shadowProjection: 1 + shadowCascades: 1 + shadowDistance: 20 + shadowNearPlaneOffset: 3 + shadowCascade2Split: 0.33333334 + shadowCascade4Split: {x: 0.06666667, y: 0.2, z: 0.46666667} + shadowmaskMode: 0 + blendWeights: 2 + textureQuality: 0 + anisotropicTextures: 0 + antiAliasing: 0 + softParticles: 0 + softVegetation: 0 + realtimeReflectionProbes: 0 + billboardsFaceCameraPosition: 0 + vSyncCount: 0 + lodBias: 0.4 + maximumLODLevel: 0 + streamingMipmapsActive: 0 + streamingMipmapsAddAllCameras: 1 + streamingMipmapsMemoryBudget: 512 + streamingMipmapsRenderersPerFrame: 512 + streamingMipmapsMaxLevelReduction: 2 + streamingMipmapsMaxFileIORequests: 1024 + particleRaycastBudget: 16 + asyncUploadTimeSlice: 2 + asyncUploadBufferSize: 16 + asyncUploadPersistentBuffer: 1 + resolutionScalingFixedDPIFactor: 1 + excludedTargetPlatforms: [] + - serializedVersion: 2 + name: Medium + pixelLightCount: 1 + shadows: 1 + shadowResolution: 0 + shadowProjection: 1 + shadowCascades: 1 + shadowDistance: 20 + shadowNearPlaneOffset: 3 + shadowCascade2Split: 0.33333334 + shadowCascade4Split: {x: 0.06666667, y: 0.2, z: 0.46666667} + shadowmaskMode: 0 + blendWeights: 2 + textureQuality: 0 + anisotropicTextures: 1 + antiAliasing: 0 + softParticles: 0 + softVegetation: 0 + realtimeReflectionProbes: 0 + billboardsFaceCameraPosition: 0 + vSyncCount: 1 + lodBias: 0.7 + maximumLODLevel: 0 + streamingMipmapsActive: 0 + streamingMipmapsAddAllCameras: 1 + streamingMipmapsMemoryBudget: 512 + streamingMipmapsRenderersPerFrame: 512 + streamingMipmapsMaxLevelReduction: 2 + streamingMipmapsMaxFileIORequests: 1024 + particleRaycastBudget: 64 + asyncUploadTimeSlice: 2 + asyncUploadBufferSize: 16 + asyncUploadPersistentBuffer: 1 + resolutionScalingFixedDPIFactor: 1 + excludedTargetPlatforms: [] + - serializedVersion: 2 + name: High + pixelLightCount: 2 + shadows: 2 + shadowResolution: 1 + shadowProjection: 1 + shadowCascades: 2 + shadowDistance: 40 + shadowNearPlaneOffset: 3 + shadowCascade2Split: 0.33333334 + shadowCascade4Split: {x: 0.06666667, y: 0.2, z: 0.46666667} + shadowmaskMode: 1 + blendWeights: 2 + textureQuality: 0 + anisotropicTextures: 1 + antiAliasing: 0 + softParticles: 0 + softVegetation: 1 + realtimeReflectionProbes: 1 + billboardsFaceCameraPosition: 1 + vSyncCount: 1 + lodBias: 1 + maximumLODLevel: 0 + streamingMipmapsActive: 0 + streamingMipmapsAddAllCameras: 1 + streamingMipmapsMemoryBudget: 512 + streamingMipmapsRenderersPerFrame: 512 + streamingMipmapsMaxLevelReduction: 2 + streamingMipmapsMaxFileIORequests: 1024 + particleRaycastBudget: 256 + asyncUploadTimeSlice: 2 + asyncUploadBufferSize: 16 + asyncUploadPersistentBuffer: 1 + resolutionScalingFixedDPIFactor: 1 + excludedTargetPlatforms: [] + - serializedVersion: 2 + name: Very High + pixelLightCount: 3 + shadows: 2 + shadowResolution: 2 + shadowProjection: 1 + shadowCascades: 2 + shadowDistance: 70 + shadowNearPlaneOffset: 3 + shadowCascade2Split: 0.33333334 + shadowCascade4Split: {x: 0.06666667, y: 0.2, z: 0.46666667} + shadowmaskMode: 1 + blendWeights: 4 + textureQuality: 0 + anisotropicTextures: 2 + antiAliasing: 2 + softParticles: 1 + softVegetation: 1 + realtimeReflectionProbes: 1 + billboardsFaceCameraPosition: 1 + vSyncCount: 1 + lodBias: 1.5 + maximumLODLevel: 0 + streamingMipmapsActive: 0 + streamingMipmapsAddAllCameras: 1 + streamingMipmapsMemoryBudget: 512 + streamingMipmapsRenderersPerFrame: 512 + streamingMipmapsMaxLevelReduction: 2 + streamingMipmapsMaxFileIORequests: 1024 + particleRaycastBudget: 1024 + asyncUploadTimeSlice: 2 + asyncUploadBufferSize: 16 + asyncUploadPersistentBuffer: 1 + resolutionScalingFixedDPIFactor: 1 + excludedTargetPlatforms: [] + - serializedVersion: 2 + name: Ultra + pixelLightCount: 4 + shadows: 2 + shadowResolution: 2 + shadowProjection: 1 + shadowCascades: 4 + shadowDistance: 150 + shadowNearPlaneOffset: 3 + shadowCascade2Split: 0.33333334 + shadowCascade4Split: {x: 0.06666667, y: 0.2, z: 0.46666667} + shadowmaskMode: 1 + blendWeights: 4 + textureQuality: 0 + anisotropicTextures: 2 + antiAliasing: 2 + softParticles: 1 + softVegetation: 1 + realtimeReflectionProbes: 1 + billboardsFaceCameraPosition: 1 + vSyncCount: 1 + lodBias: 2 + maximumLODLevel: 0 + streamingMipmapsActive: 0 + streamingMipmapsAddAllCameras: 1 + streamingMipmapsMemoryBudget: 512 + streamingMipmapsRenderersPerFrame: 512 + streamingMipmapsMaxLevelReduction: 2 + streamingMipmapsMaxFileIORequests: 1024 + particleRaycastBudget: 4096 + asyncUploadTimeSlice: 2 + asyncUploadBufferSize: 16 + asyncUploadPersistentBuffer: 1 + resolutionScalingFixedDPIFactor: 1 + excludedTargetPlatforms: [] + m_PerPlatformDefaultQuality: + Android: 2 + Lumin: 5 + Nintendo 3DS: 5 + Nintendo Switch: 5 + PS4: 5 + PSP2: 2 + Stadia: 5 + Standalone: 5 + WebGL: 3 + Windows Store Apps: 5 + XboxOne: 5 + iPhone: 2 + tvOS: 2 diff --git a/ProjectSettings/ScarlettGame.config b/ProjectSettings/ScarlettGame.config new file mode 100644 index 0000000..4d6075c --- /dev/null +++ b/ProjectSettings/ScarlettGame.config @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/ProjectSettings/TagManager.asset b/ProjectSettings/TagManager.asset new file mode 100644 index 0000000..1c92a78 --- /dev/null +++ b/ProjectSettings/TagManager.asset @@ -0,0 +1,43 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!78 &1 +TagManager: + serializedVersion: 2 + tags: [] + layers: + - Default + - TransparentFX + - Ignore Raycast + - + - Water + - UI + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + m_SortingLayers: + - name: Default + uniqueID: 0 + locked: 0 diff --git a/ProjectSettings/TimeManager.asset b/ProjectSettings/TimeManager.asset new file mode 100644 index 0000000..558a017 --- /dev/null +++ b/ProjectSettings/TimeManager.asset @@ -0,0 +1,9 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!5 &1 +TimeManager: + m_ObjectHideFlags: 0 + Fixed Timestep: 0.02 + Maximum Allowed Timestep: 0.33333334 + m_TimeScale: 1 + Maximum Particle Timestep: 0.03 diff --git a/ProjectSettings/UnityConnectSettings.asset b/ProjectSettings/UnityConnectSettings.asset new file mode 100644 index 0000000..6125b30 --- /dev/null +++ b/ProjectSettings/UnityConnectSettings.asset @@ -0,0 +1,35 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!310 &1 +UnityConnectSettings: + m_ObjectHideFlags: 0 + serializedVersion: 1 + m_Enabled: 0 + m_TestMode: 0 + m_EventOldUrl: https://api.uca.cloud.unity3d.com/v1/events + m_EventUrl: https://cdp.cloud.unity3d.com/v1/events + m_ConfigUrl: https://config.uca.cloud.unity3d.com + m_DashboardUrl: https://dashboard.unity3d.com + m_TestInitMode: 0 + CrashReportingSettings: + m_EventUrl: https://perf-events.cloud.unity3d.com + m_Enabled: 0 + m_LogBufferSize: 10 + m_CaptureEditorExceptions: 1 + UnityPurchasingSettings: + m_Enabled: 0 + m_TestMode: 0 + UnityAnalyticsSettings: + m_Enabled: 0 + m_TestMode: 0 + m_InitializeOnStartup: 1 + UnityAdsSettings: + m_Enabled: 0 + m_InitializeOnStartup: 1 + m_TestMode: 0 + m_IosGameId: + m_AndroidGameId: + m_GameIds: {} + m_GameId: + PerformanceReportingSettings: + m_Enabled: 0 diff --git a/ProjectSettings/VFXManager.asset b/ProjectSettings/VFXManager.asset new file mode 100644 index 0000000..3a95c98 --- /dev/null +++ b/ProjectSettings/VFXManager.asset @@ -0,0 +1,12 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!937362698 &1 +VFXManager: + m_ObjectHideFlags: 0 + m_IndirectShader: {fileID: 0} + m_CopyBufferShader: {fileID: 0} + m_SortShader: {fileID: 0} + m_StripUpdateShader: {fileID: 0} + m_RenderPipeSettingsPath: + m_FixedTimeStep: 0.016666668 + m_MaxDeltaTime: 0.05 diff --git a/ProjectSettings/VersionControlSettings.asset b/ProjectSettings/VersionControlSettings.asset new file mode 100644 index 0000000..dca2881 --- /dev/null +++ b/ProjectSettings/VersionControlSettings.asset @@ -0,0 +1,8 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!890905787 &1 +VersionControlSettings: + m_ObjectHideFlags: 0 + m_Mode: Visible Meta Files + m_CollabEditorSettings: + inProgressEnabled: 1 diff --git a/ProjectSettings/XRSettings.asset b/ProjectSettings/XRSettings.asset new file mode 100644 index 0000000..482590c --- /dev/null +++ b/ProjectSettings/XRSettings.asset @@ -0,0 +1,10 @@ +{ + "m_SettingKeys": [ + "VR Device Disabled", + "VR Device User Alert" + ], + "m_SettingValues": [ + "False", + "False" + ] +} \ No newline at end of file diff --git a/ProjectSettings/XboxOneGame.config b/ProjectSettings/XboxOneGame.config new file mode 100644 index 0000000..4d6075c --- /dev/null +++ b/ProjectSettings/XboxOneGame.config @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/ProjectSettings/boot.config b/ProjectSettings/boot.config new file mode 100644 index 0000000..e69de29