From 5497a6dfe6c2617964ef41b4c33170fbdf3903ce Mon Sep 17 00:00:00 2001
From: Nico de Poel <ndepoel@gmail.com>
Date: Tue, 4 Jun 2024 18:31:43 +0200
Subject: [PATCH] Implemented the rest of the FSR3 Upscaler plugin using the
 existing open source code and it uhh.... just works

---
 .../RenderPass/Upscalers/FSR3.meta            |    8 +
 .../Upscalers/FSR3/Fsr3ShaderIDs.cs           |   80 +
 .../Upscalers/FSR3/Fsr3ShaderIDs.cs.meta      |    3 +
 .../RenderPass/Upscalers/FSR3/Fsr3Upscaler.cs |  333 ++
 .../Upscalers/FSR3/Fsr3Upscaler.cs.meta       |    3 +
 .../Upscalers/FSR3/Fsr3UpscalerAssets.cs      |  151 +
 .../Upscalers/FSR3/Fsr3UpscalerAssets.cs.meta |   11 +
 .../Upscalers/FSR3/Fsr3UpscalerCallbacks.cs   |   81 +
 .../FSR3/Fsr3UpscalerCallbacks.cs.meta        |   11 +
 .../Upscalers/FSR3/Fsr3UpscalerContext.cs     |  610 ++++
 .../FSR3/Fsr3UpscalerContext.cs.meta          |    3 +
 .../Upscalers/FSR3/Fsr3UpscalerPass.cs        |  372 ++
 .../Upscalers/FSR3/Fsr3UpscalerPass.cs.meta   |    3 +
 .../Upscalers/FSR3/Fsr3UpscalerResources.cs   |  251 ++
 .../FSR3/Fsr3UpscalerResources.cs.meta        |    3 +
 .../RenderPass/Upscalers/FSR3/Resources.meta  |    8 +
 .../FSR3/Resources/Fsr3UpscalerAssets.asset   |   23 +
 .../Resources/Fsr3UpscalerAssets.asset.meta   |    8 +
 .../ffx_fsr3upscaler_accumulate_pass.compute  |   41 +
 ..._fsr3upscaler_accumulate_pass.compute.meta |    8 +
 ...fsr3upscaler_autogen_reactive_pass.compute |   32 +
 ...pscaler_autogen_reactive_pass.compute.meta |    8 +
 ...ler_compute_luminance_pyramid_pass.compute |   42 +
 ...ompute_luminance_pyramid_pass.compute.meta |    8 +
 .../ffx_fsr3upscaler_depth_clip_pass.compute  |   32 +
 ..._fsr3upscaler_depth_clip_pass.compute.meta |    8 +
 .../FSR3/ffx_fsr3upscaler_lock_pass.compute   |   30 +
 .../ffx_fsr3upscaler_lock_pass.compute.meta   |    8 +
 .../FSR3/ffx_fsr3upscaler_rcas_pass.compute   |   29 +
 .../ffx_fsr3upscaler_rcas_pass.compute.meta   |    8 +
 ...er_reconstruct_previous_depth_pass.compute |   33 +
 ...construct_previous_depth_pass.compute.meta |    8 +
 .../ffx_fsr3upscaler_tcr_autogen_pass.compute |   32 +
 ...fsr3upscaler_tcr_autogen_pass.compute.meta |    8 +
 .../FSR3/ffx_fsr3upscaler_unity_common.cginc  |   82 +
 .../ffx_fsr3upscaler_unity_common.cginc.meta  |    7 +
 .../RenderPass/Upscalers/FSR3/shaders.meta    |    8 +
 .../ffx_fsr3upscaler_accumulate_pass.hlsl     |   79 +
 ...ffx_fsr3upscaler_accumulate_pass.hlsl.meta |    7 +
 ...fx_fsr3upscaler_autogen_reactive_pass.hlsl |   77 +
 ...r3upscaler_autogen_reactive_pass.hlsl.meta |    7 +
 ...scaler_compute_luminance_pyramid_pass.hlsl |   55 +
 ...r_compute_luminance_pyramid_pass.hlsl.meta |    7 +
 .../ffx_fsr3upscaler_depth_clip_pass.hlsl     |   67 +
 ...ffx_fsr3upscaler_depth_clip_pass.hlsl.meta |    7 +
 .../shaders/ffx_fsr3upscaler_lock_pass.hlsl   |   56 +
 .../ffx_fsr3upscaler_lock_pass.hlsl.meta      |    7 +
 .../shaders/ffx_fsr3upscaler_rcas_pass.hlsl   |   53 +
 .../ffx_fsr3upscaler_rcas_pass.hlsl.meta      |    7 +
 ...caler_reconstruct_previous_depth_pass.hlsl |   64 +
 ..._reconstruct_previous_depth_pass.hlsl.meta |    7 +
 .../ffx_fsr3upscaler_tcr_autogen_pass.hlsl    |   90 +
 ...fx_fsr3upscaler_tcr_autogen_pass.hlsl.meta |    7 +
 .../Upscalers/FSR3/shaders/fsr3upscaler.meta  |    8 +
 .../shaders/fsr3upscaler/ffx_common_types.h   |  616 ++++
 .../fsr3upscaler/ffx_common_types.h.meta      |   27 +
 .../FSR3/shaders/fsr3upscaler/ffx_core.h      |   80 +
 .../FSR3/shaders/fsr3upscaler/ffx_core.h.meta |   27 +
 .../FSR3/shaders/fsr3upscaler/ffx_core_cpu.h  |  338 ++
 .../shaders/fsr3upscaler/ffx_core_cpu.h.meta  |   27 +
 .../fsr3upscaler/ffx_core_gpu_common.h        | 2784 +++++++++++++++
 .../fsr3upscaler/ffx_core_gpu_common.h.meta   |   27 +
 .../fsr3upscaler/ffx_core_gpu_common_half.h   | 2979 +++++++++++++++++
 .../ffx_core_gpu_common_half.h.meta           |   27 +
 .../FSR3/shaders/fsr3upscaler/ffx_core_hlsl.h | 1651 +++++++++
 .../shaders/fsr3upscaler/ffx_core_hlsl.h.meta |   27 +
 .../fsr3upscaler/ffx_core_portability.h       |   51 +
 .../fsr3upscaler/ffx_core_portability.h.meta  |   27 +
 .../ffx_fsr3upscaler_accumulate.h             |  288 ++
 .../ffx_fsr3upscaler_accumulate.h.meta        |   27 +
 .../ffx_fsr3upscaler_callbacks_hlsl.h         |  928 +++++
 .../ffx_fsr3upscaler_callbacks_hlsl.h.meta    |   27 +
 .../fsr3upscaler/ffx_fsr3upscaler_common.h    |  566 ++++
 .../ffx_fsr3upscaler_common.h.meta            |   27 +
 ...x_fsr3upscaler_compute_luminance_pyramid.h |  176 +
 ...3upscaler_compute_luminance_pyramid.h.meta |   27 +
 .../ffx_fsr3upscaler_depth_clip.h             |  259 ++
 .../ffx_fsr3upscaler_depth_clip.h.meta        |   27 +
 .../fsr3upscaler/ffx_fsr3upscaler_lock.h      |  116 +
 .../fsr3upscaler/ffx_fsr3upscaler_lock.h.meta |   27 +
 ...ffx_fsr3upscaler_postprocess_lock_status.h |  107 +
 ...sr3upscaler_postprocess_lock_status.h.meta |   27 +
 .../fsr3upscaler/ffx_fsr3upscaler_rcas.h      |   67 +
 .../fsr3upscaler/ffx_fsr3upscaler_rcas.h.meta |   27 +
 ...ruct_dilated_velocity_and_previous_depth.h |  146 +
 ...dilated_velocity_and_previous_depth.h.meta |   27 +
 .../fsr3upscaler/ffx_fsr3upscaler_reproject.h |  137 +
 .../ffx_fsr3upscaler_reproject.h.meta         |   27 +
 .../fsr3upscaler/ffx_fsr3upscaler_resources.h |  104 +
 .../ffx_fsr3upscaler_resources.h.meta         |   27 +
 .../fsr3upscaler/ffx_fsr3upscaler_sample.h    |  606 ++++
 .../ffx_fsr3upscaler_sample.h.meta            |   27 +
 .../ffx_fsr3upscaler_tcr_autogen.h            |  250 ++
 .../ffx_fsr3upscaler_tcr_autogen.h.meta       |   27 +
 .../fsr3upscaler/ffx_fsr3upscaler_upsample.h  |  195 ++
 .../ffx_fsr3upscaler_upsample.h.meta          |   27 +
 .../FSR3/shaders/fsr3upscaler/fsr1.meta       |    8 +
 .../FSR3/shaders/fsr3upscaler/fsr1/ffx_fsr1.h | 1252 +++++++
 .../shaders/fsr3upscaler/fsr1/ffx_fsr1.h.meta |   27 +
 .../FSR3/shaders/fsr3upscaler/spd.meta        |    8 +
 .../FSR3/shaders/fsr3upscaler/spd/ffx_spd.h   | 1009 ++++++
 .../shaders/fsr3upscaler/spd/ffx_spd.h.meta   |   27 +
 .../RenderPass/Upscalers/FSR3Upscaler.cs      |   85 +-
 .../RenderPass/Upscalers/UpscalerPlugin.cs    |    5 +-
 104 files changed, 18416 insertions(+), 15 deletions(-)
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3ShaderIDs.cs
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3ShaderIDs.cs.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3Upscaler.cs
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3Upscaler.cs.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerAssets.cs
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerAssets.cs.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerCallbacks.cs
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerCallbacks.cs.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerContext.cs
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerContext.cs.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerPass.cs
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerPass.cs.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerResources.cs
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerResources.cs.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Resources.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Resources/Fsr3UpscalerAssets.asset
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Resources/Fsr3UpscalerAssets.asset.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_accumulate_pass.compute
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_accumulate_pass.compute.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_autogen_reactive_pass.compute
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_autogen_reactive_pass.compute.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_compute_luminance_pyramid_pass.compute
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_compute_luminance_pyramid_pass.compute.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_depth_clip_pass.compute
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_depth_clip_pass.compute.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_lock_pass.compute
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_lock_pass.compute.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_rcas_pass.compute
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_rcas_pass.compute.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_reconstruct_previous_depth_pass.compute
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_reconstruct_previous_depth_pass.compute.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_tcr_autogen_pass.compute
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_tcr_autogen_pass.compute.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_unity_common.cginc
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_unity_common.cginc.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_compute_luminance_pyramid_pass.hlsl
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_compute_luminance_pyramid_pass.hlsl.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_depth_clip_pass.hlsl
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_depth_clip_pass.hlsl.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_lock_pass.hlsl
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_lock_pass.hlsl.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_rcas_pass.hlsl
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_rcas_pass.hlsl.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_reconstruct_previous_depth_pass.hlsl
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_reconstruct_previous_depth_pass.hlsl.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_common_types.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_common_types.h.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core.h.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_cpu.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_cpu.h.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common.h.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common_half.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common_half.h.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_hlsl.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_hlsl.h.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_portability.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_portability.h.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_depth_clip.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_depth_clip.h.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_lock.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_lock.h.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/fsr1.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/fsr1/ffx_fsr1.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/fsr1/ffx_fsr1.h.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/spd.meta
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/spd/ffx_spd.h
 create mode 100644 com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/spd/ffx_spd.h.meta

diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3.meta
new file mode 100644
index 00000000..e12cfa8a
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: bfeca308812376e4a8e5e49e0d96c5c6
+folderAsset: yes
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3ShaderIDs.cs b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3ShaderIDs.cs
new file mode 100644
index 00000000..3a288431
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3ShaderIDs.cs
@@ -0,0 +1,80 @@
+﻿// Copyright (c) 2023 Nico de Poel
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+using UnityEngine;
+
+namespace FidelityFX
+{
+    internal static class Fsr3ShaderIDs
+    {
+        // Shader resource views, i.e. read-only bindings
+        internal static readonly int SrvInputColor = Shader.PropertyToID("r_input_color_jittered");
+        internal static readonly int SrvOpaqueOnly = Shader.PropertyToID("r_input_opaque_only");
+        internal static readonly int SrvInputMotionVectors = Shader.PropertyToID("r_input_motion_vectors");
+        internal static readonly int SrvInputDepth = Shader.PropertyToID("r_input_depth");
+        internal static readonly int SrvInputExposure = Shader.PropertyToID("r_input_exposure");
+        internal static readonly int SrvAutoExposure = Shader.PropertyToID("r_auto_exposure");
+        internal static readonly int SrvReactiveMask = Shader.PropertyToID("r_reactive_mask");
+        internal static readonly int SrvTransparencyAndCompositionMask = Shader.PropertyToID("r_transparency_and_composition_mask");
+        internal static readonly int SrvReconstructedPrevNearestDepth = Shader.PropertyToID("r_reconstructed_previous_nearest_depth");
+        internal static readonly int SrvDilatedMotionVectors = Shader.PropertyToID("r_dilated_motion_vectors");
+        internal static readonly int SrvPrevDilatedMotionVectors = Shader.PropertyToID("r_previous_dilated_motion_vectors");
+        internal static readonly int SrvDilatedDepth = Shader.PropertyToID("r_dilated_depth");
+        internal static readonly int SrvInternalUpscaled = Shader.PropertyToID("r_internal_upscaled_color");
+        internal static readonly int SrvLockStatus = Shader.PropertyToID("r_lock_status");
+        internal static readonly int SrvLockInputLuma = Shader.PropertyToID("r_lock_input_luma");
+        internal static readonly int SrvPreparedInputColor = Shader.PropertyToID("r_prepared_input_color");
+        internal static readonly int SrvLumaHistory = Shader.PropertyToID("r_luma_history");
+        internal static readonly int SrvRcasInput = Shader.PropertyToID("r_rcas_input");
+        internal static readonly int SrvLanczosLut = Shader.PropertyToID("r_lanczos_lut");
+        internal static readonly int SrvSceneLuminanceMips = Shader.PropertyToID("r_imgMips");
+        internal static readonly int SrvUpscaleMaximumBiasLut = Shader.PropertyToID("r_upsample_maximum_bias_lut");
+        internal static readonly int SrvDilatedReactiveMasks = Shader.PropertyToID("r_dilated_reactive_masks");
+        internal static readonly int SrvPrevColorPreAlpha = Shader.PropertyToID("r_input_prev_color_pre_alpha");
+        internal static readonly int SrvPrevColorPostAlpha = Shader.PropertyToID("r_input_prev_color_post_alpha");
+
+        // Unordered access views, i.e. random read/write bindings
+        internal static readonly int UavReconstructedPrevNearestDepth = Shader.PropertyToID("rw_reconstructed_previous_nearest_depth");
+        internal static readonly int UavDilatedMotionVectors = Shader.PropertyToID("rw_dilated_motion_vectors");
+        internal static readonly int UavDilatedDepth = Shader.PropertyToID("rw_dilated_depth");
+        internal static readonly int UavInternalUpscaled = Shader.PropertyToID("rw_internal_upscaled_color");
+        internal static readonly int UavLockStatus = Shader.PropertyToID("rw_lock_status");
+        internal static readonly int UavLockInputLuma = Shader.PropertyToID("rw_lock_input_luma");
+        internal static readonly int UavNewLocks = Shader.PropertyToID("rw_new_locks");
+        internal static readonly int UavPreparedInputColor = Shader.PropertyToID("rw_prepared_input_color");
+        internal static readonly int UavLumaHistory = Shader.PropertyToID("rw_luma_history");
+        internal static readonly int UavUpscaledOutput = Shader.PropertyToID("rw_upscaled_output");
+        internal static readonly int UavExposureMipLumaChange = Shader.PropertyToID("rw_img_mip_shading_change");
+        internal static readonly int UavExposureMip5 = Shader.PropertyToID("rw_img_mip_5");
+        internal static readonly int UavDilatedReactiveMasks = Shader.PropertyToID("rw_dilated_reactive_masks");
+        internal static readonly int UavAutoExposure = Shader.PropertyToID("rw_auto_exposure");
+        internal static readonly int UavSpdAtomicCount = Shader.PropertyToID("rw_spd_global_atomic");
+        internal static readonly int UavAutoReactive = Shader.PropertyToID("rw_output_autoreactive");
+        internal static readonly int UavAutoComposition = Shader.PropertyToID("rw_output_autocomposition");
+        internal static readonly int UavPrevColorPreAlpha = Shader.PropertyToID("rw_output_prev_color_pre_alpha");
+        internal static readonly int UavPrevColorPostAlpha = Shader.PropertyToID("rw_output_prev_color_post_alpha");
+
+        // Constant buffer bindings
+        internal static readonly int CbFsr3Upscaler = Shader.PropertyToID("cbFSR3Upscaler");
+        internal static readonly int CbSpd = Shader.PropertyToID("cbSPD");
+        internal static readonly int CbRcas = Shader.PropertyToID("cbRCAS");
+        internal static readonly int CbGenReactive = Shader.PropertyToID("cbGenerateReactive");
+    }
+}
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3ShaderIDs.cs.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3ShaderIDs.cs.meta
new file mode 100644
index 00000000..90126170
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3ShaderIDs.cs.meta
@@ -0,0 +1,3 @@
+﻿fileFormatVersion: 2
+guid: a0e0bcc2967836742b7864d1cafafbf0
+timeCreated: 1679060863
\ No newline at end of file
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3Upscaler.cs b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3Upscaler.cs
new file mode 100644
index 00000000..c636c93f
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3Upscaler.cs
@@ -0,0 +1,333 @@
+﻿// Copyright (c) 2023 Nico de Poel
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+using System;
+using System.Runtime.InteropServices;
+using UnityEngine;
+using UnityEngine.Rendering;
+
+namespace FidelityFX
+{
+    /// <summary>
+    /// A collection of helper functions and data structures required by the FSR3 Upscaler process.
+    /// </summary>
+    public static class Fsr3Upscaler
+    {
+        /// <summary>
+        /// Creates a new FSR3 Upscaler context with standard parameters that are appropriate for the current platform. 
+        /// </summary>
+        public static Fsr3UpscalerContext CreateContext(Vector2Int displaySize, Vector2Int maxRenderSize, Fsr3UpscalerShaders shaders, InitializationFlags flags = 0)
+        {
+            if (SystemInfo.usesReversedZBuffer)
+                flags |= InitializationFlags.EnableDepthInverted;
+            else
+                flags &= ~InitializationFlags.EnableDepthInverted;
+            
+#if UNITY_EDITOR || DEVELOPMENT_BUILD
+            flags |= InitializationFlags.EnableDebugChecking;
+#endif
+            
+            Debug.Log($"Setting up FSR3 Upscaler with render size: {maxRenderSize.x}x{maxRenderSize.y}, display size: {displaySize.x}x{displaySize.y}, flags: {flags}");
+            
+            var contextDescription = new ContextDescription
+            {
+                Flags = flags,
+                DisplaySize = displaySize,
+                MaxRenderSize = maxRenderSize,
+                Shaders = shaders,
+            };
+            
+            var context = new Fsr3UpscalerContext();
+            context.Create(contextDescription);
+            return context;
+        }
+
+        public static float GetUpscaleRatioFromQualityMode(QualityMode qualityMode)
+        {
+            switch (qualityMode)
+            {
+                case QualityMode.NativeAA:
+                    return 1.0f;
+                case QualityMode.UltraQuality:
+                    return 1.2f;
+                case QualityMode.Quality:
+                    return 1.5f;
+                case QualityMode.Balanced:
+                    return 1.7f;
+                case QualityMode.Performance:
+                    return 2.0f;
+                case QualityMode.UltraPerformance:
+                    return 3.0f;
+                default:
+                    return 1.0f;
+            }
+        }
+
+        public static void GetRenderResolutionFromQualityMode(
+            out int renderWidth, out int renderHeight,
+            int displayWidth, int displayHeight, QualityMode qualityMode)
+        {
+            float ratio = GetUpscaleRatioFromQualityMode(qualityMode);
+            renderWidth = Mathf.RoundToInt(displayWidth / ratio);
+            renderHeight = Mathf.RoundToInt(displayHeight / ratio);
+        }
+        
+        public static float GetMipmapBiasOffset(int renderWidth, int displayWidth)
+        {
+            return Mathf.Log((float)renderWidth / displayWidth, 2.0f) - 1.0f;
+        }
+
+        public static int GetJitterPhaseCount(int renderWidth, int displayWidth)
+        {
+            const float basePhaseCount = 8.0f;
+            int jitterPhaseCount = (int)(basePhaseCount * Mathf.Pow((float)displayWidth / renderWidth, 2.0f));
+            return jitterPhaseCount;
+        }
+
+        public static void GetJitterOffset(out float outX, out float outY, int index, int phaseCount)
+        {
+            outX = Halton((index % phaseCount) + 1, 2) - 0.5f;
+            outY = Halton((index % phaseCount) + 1, 3) - 0.5f;
+        }
+        
+        // Calculate halton number for index and base.
+        private static float Halton(int index, int @base)
+        {
+            float f = 1.0f, result = 0.0f;
+
+            for (int currentIndex = index; currentIndex > 0;) {
+
+                f /= @base;
+                result += f * (currentIndex % @base);
+                currentIndex = (int)Mathf.Floor((float)currentIndex / @base);
+            }
+
+            return result;
+        }
+        
+        public static float Lanczos2(float value)
+        {
+            return Mathf.Abs(value) < Mathf.Epsilon ? 1.0f : Mathf.Sin(Mathf.PI * value) / (Mathf.PI * value) * (Mathf.Sin(0.5f * Mathf.PI * value) / (0.5f * Mathf.PI * value));
+        }
+        
+#if !UNITY_2021_1_OR_NEWER
+        internal static void SetBufferData(this CommandBuffer commandBuffer, ComputeBuffer computeBuffer, Array data)
+        {
+            commandBuffer.SetComputeBufferData(computeBuffer, data);
+        }
+#endif
+        
+        public enum QualityMode
+        {
+            NativeAA = 0,
+            UltraQuality = 1,
+            Quality = 2,
+            Balanced = 3,
+            Performance = 4,
+            UltraPerformance = 5,
+        }
+
+        [Flags]
+        public enum InitializationFlags
+        {
+            EnableHighDynamicRange = 1 << 0,
+            EnableDisplayResolutionMotionVectors = 1 << 1,
+            EnableMotionVectorsJitterCancellation = 1 << 2,
+            EnableDepthInverted = 1 << 3,
+            EnableDepthInfinite = 1 << 4,
+            EnableAutoExposure = 1 << 5,
+            EnableDynamicResolution = 1 << 6,
+            EnableFP16Usage = 1 << 7,
+            EnableDebugChecking = 1 << 8,
+        }
+
+        /// <summary>
+        /// A structure encapsulating the parameters required to initialize FidelityFX Super Resolution 3 upscaling.
+        /// </summary>
+        public struct ContextDescription
+        {
+            public InitializationFlags Flags;
+            public Vector2Int MaxRenderSize;
+            public Vector2Int DisplaySize;
+            public Fsr3UpscalerShaders Shaders;
+        }
+
+        /// <summary>
+        /// A structure encapsulating the parameters for dispatching the various passes of FidelityFX Super Resolution 3.
+        /// </summary>
+        public class DispatchDescription
+        {
+            public ResourceView Color;
+            public ResourceView Depth;
+            public ResourceView MotionVectors;
+            public ResourceView Exposure;                       // optional
+            public ResourceView Reactive;                       // optional
+            public ResourceView TransparencyAndComposition;     // optional
+            public ResourceView Output;
+            public Vector2 JitterOffset;
+            public Vector2 MotionVectorScale;
+            public Vector2Int RenderSize;
+            public Vector2Int InputResourceSize;
+            public bool EnableSharpening;
+            public float Sharpness;
+            public float FrameTimeDelta;    // in seconds
+            public float PreExposure;
+            public bool Reset;
+            public float CameraNear;
+            public float CameraFar;
+            public float CameraFovAngleVertical;
+            public float ViewSpaceToMetersFactor;
+            
+            // EXPERIMENTAL reactive mask generation parameters
+            public bool EnableAutoReactive;
+            public ResourceView ColorOpaqueOnly;
+            public float AutoTcThreshold = 0.05f;
+            public float AutoTcScale = 1.0f;
+            public float AutoReactiveScale = 5.0f;
+            public float AutoReactiveMax = 0.9f;
+        }
+
+        /// <summary>
+        /// A structure encapsulating the parameters for automatic generation of a reactive mask.
+        /// The default values for Scale, CutoffThreshold, BinaryValue and Flags were taken from the FSR3 demo project.
+        /// </summary>
+        public class GenerateReactiveDescription
+        {
+            public ResourceView ColorOpaqueOnly;
+            public ResourceView ColorPreUpscale;
+            public ResourceView OutReactive;
+            public Vector2Int RenderSize;
+            public float Scale = 0.5f;
+            public float CutoffThreshold = 0.2f;
+            public float BinaryValue = 0.9f;
+            public GenerateReactiveFlags Flags = GenerateReactiveFlags.ApplyTonemap | GenerateReactiveFlags.ApplyThreshold | GenerateReactiveFlags.UseComponentsMax;
+        }
+
+        [Flags]
+        public enum GenerateReactiveFlags
+        {
+            ApplyTonemap = 1 << 0,
+            ApplyInverseTonemap = 1 << 1,
+            ApplyThreshold = 1 << 2,
+            UseComponentsMax = 1 << 3,
+        }
+        
+        [Serializable, StructLayout(LayoutKind.Sequential)]
+        internal struct UpscalerConstants
+        {
+            public Vector2Int renderSize;
+            public Vector2Int maxRenderSize;
+            public Vector2Int displaySize;
+            public Vector2Int inputColorResourceDimensions;
+            public Vector2Int lumaMipDimensions;
+            public int lumaMipLevelToUse;
+            public int frameIndex;
+            
+            public Vector4 deviceToViewDepth;
+            public Vector2 jitterOffset;
+            public Vector2 motionVectorScale;
+            public Vector2 downscaleFactor;
+            public Vector2 motionVectorJitterCancellation;
+            public float preExposure;
+            public float previousFramePreExposure;
+            public float tanHalfFOV;
+            public float jitterPhaseCount;
+            public float deltaTime;
+            public float dynamicResChangeFactor;
+            public float viewSpaceToMetersFactor;
+
+            public int dummy;
+        }
+        
+        [Serializable, StructLayout(LayoutKind.Sequential)]
+        internal struct SpdConstants
+        {
+            public uint mips;
+            public uint numWorkGroups;
+            public uint workGroupOffsetX, workGroupOffsetY;
+            public uint renderSizeX, renderSizeY;
+        }
+
+        [Serializable, StructLayout(LayoutKind.Sequential)]
+        internal struct GenerateReactiveConstants
+        {
+            public float scale;
+            public float threshold;
+            public float binaryValue;
+            public uint flags;
+        }
+
+        [Serializable, StructLayout(LayoutKind.Sequential)]
+        internal struct GenerateReactiveConstants2
+        {
+            public float autoTcThreshold;
+            public float autoTcScale;
+            public float autoReactiveScale;
+            public float autoReactiveMax;
+        }
+        
+        [Serializable, StructLayout(LayoutKind.Sequential)]
+        internal struct RcasConstants
+        {
+            public RcasConstants(uint sharpness, uint halfSharp)
+            {
+                this.sharpness = sharpness;
+                this.halfSharp = halfSharp;
+                dummy0 = dummy1 = 0;
+            }
+        
+            public readonly uint sharpness;
+            public readonly uint halfSharp;
+            public readonly uint dummy0;
+            public readonly uint dummy1;
+        }
+    }
+    
+    /// <summary>
+    /// An immutable structure wrapping all of the necessary information to bind a specific buffer or attachment of a render target to a compute shader.
+    /// </summary>
+    public readonly struct ResourceView
+    {
+        /// <summary>
+        /// This value is the equivalent of not setting any value at all; all struct fields will have their default values.
+        /// It does not refer to a valid texture, therefore any variable set to this value should be checked for IsValid and reassigned before being bound to a shader.
+        /// </summary>
+        public static readonly ResourceView Unassigned = new ResourceView(default);
+            
+        /// <summary>
+        /// This value contains a valid texture reference that can be bound to a shader, however it is just an empty placeholder texture.
+        /// Binding this to a shader can be seen as setting the texture variable inside the shader to null.
+        /// </summary>
+        public static readonly ResourceView None = new ResourceView(BuiltinRenderTextureType.None);
+            
+        public ResourceView(in RenderTargetIdentifier renderTarget, RenderTextureSubElement subElement = RenderTextureSubElement.Default, int mipLevel = 0)
+        {
+            RenderTarget = renderTarget;
+            SubElement = subElement;
+            MipLevel = mipLevel;
+        }
+            
+        public bool IsValid => !RenderTarget.Equals(default);
+            
+        public readonly RenderTargetIdentifier RenderTarget;
+        public readonly RenderTextureSubElement SubElement;
+        public readonly int MipLevel;
+    }
+}
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3Upscaler.cs.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3Upscaler.cs.meta
new file mode 100644
index 00000000..6717df42
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3Upscaler.cs.meta
@@ -0,0 +1,3 @@
+﻿fileFormatVersion: 2
+guid: df8b18c192f2dc145b4b43e68fd3407d
+timeCreated: 1673441954
\ No newline at end of file
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerAssets.cs b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerAssets.cs
new file mode 100644
index 00000000..3e4e24f2
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerAssets.cs
@@ -0,0 +1,151 @@
+﻿// Copyright (c) 2023 Nico de Poel
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+using UnityEngine;
+
+namespace FidelityFX
+{
+    /// <summary>
+    /// Scriptable object containing all shader resources required by FidelityFX Super Resolution 3 (FSR3) Upscaler.
+    /// These can be stored in an asset file and referenced from a scene or prefab, avoiding the need to load the shaders from a Resources folder.
+    /// </summary>
+    public class Fsr3UpscalerAssets : ScriptableObject
+    {
+        public Fsr3UpscalerShaders shaders;
+        
+#if UNITY_EDITOR
+        private void Reset()
+        {
+            shaders = new Fsr3UpscalerShaders
+            {
+                computeLuminancePyramidPass = FindComputeShader("ffx_fsr3upscaler_compute_luminance_pyramid_pass"),
+                reconstructPreviousDepthPass = FindComputeShader("ffx_fsr3upscaler_reconstruct_previous_depth_pass"),
+                depthClipPass = FindComputeShader("ffx_fsr3upscaler_depth_clip_pass"),
+                lockPass = FindComputeShader("ffx_fsr3upscaler_lock_pass"),
+                accumulatePass = FindComputeShader("ffx_fsr3upscaler_accumulate_pass"),
+                sharpenPass = FindComputeShader("ffx_fsr3upscaler_rcas_pass"),
+                autoGenReactivePass = FindComputeShader("ffx_fsr3upscaler_autogen_reactive_pass"),
+                tcrAutoGenPass = FindComputeShader("ffx_fsr3upscaler_tcr_autogen_pass"),
+            };
+        }
+
+        private static ComputeShader FindComputeShader(string name)
+        {
+            string[] assetGuids = UnityEditor.AssetDatabase.FindAssets($"t:ComputeShader {name}");
+            if (assetGuids == null || assetGuids.Length == 0)
+                return null;
+
+            string assetPath = UnityEditor.AssetDatabase.GUIDToAssetPath(assetGuids[0]);
+            return UnityEditor.AssetDatabase.LoadAssetAtPath<ComputeShader>(assetPath);
+        }
+#endif
+    }
+    
+    /// <summary>
+    /// All the compute shaders used by the FSR3 Upscaler.
+    /// </summary>
+    [System.Serializable]
+    public class Fsr3UpscalerShaders
+    {
+        /// <summary>
+        /// The compute shader used by the luminance pyramid computation pass.
+        /// </summary>
+        public ComputeShader computeLuminancePyramidPass;
+
+        /// <summary>
+        /// The compute shader used by the previous depth reconstruction pass.
+        /// </summary>
+        public ComputeShader reconstructPreviousDepthPass;
+
+        /// <summary>
+        /// The compute shader used by the depth clip pass.
+        /// </summary>
+        public ComputeShader depthClipPass;
+
+        /// <summary>
+        /// The compute shader used by the lock pass.
+        /// </summary>
+        public ComputeShader lockPass;
+
+        /// <summary>
+        /// The compute shader used by the accumulation pass.
+        /// </summary>
+        public ComputeShader accumulatePass;
+
+        /// <summary>
+        /// The compute shader used by the RCAS sharpening pass.
+        /// </summary>
+        public ComputeShader sharpenPass;
+
+        /// <summary>
+        /// The compute shader used to auto-generate a reactive mask.
+        /// </summary>
+        public ComputeShader autoGenReactivePass;
+
+        /// <summary>
+        /// The compute shader used to auto-generate a transparency & composition mask.
+        /// </summary>
+        public ComputeShader tcrAutoGenPass;
+        
+        /// <summary>
+        /// Returns a copy of this class and its contents.
+        /// </summary>
+        public Fsr3UpscalerShaders Clone()
+        {
+            return (Fsr3UpscalerShaders)MemberwiseClone();
+        }
+
+        /// <summary>
+        /// Returns a copy of this class with clones of all its shaders.
+        /// This can be useful if you're running multiple FSR3 Upscaler instances with different shader configurations.
+        /// Be sure to clean up these clones through Dispose once you're done with them.
+        /// </summary>
+        public Fsr3UpscalerShaders DeepCopy()
+        {
+            return new Fsr3UpscalerShaders
+            {
+                computeLuminancePyramidPass = Object.Instantiate(computeLuminancePyramidPass),
+                reconstructPreviousDepthPass = Object.Instantiate(reconstructPreviousDepthPass),
+                depthClipPass = Object.Instantiate(depthClipPass),
+                lockPass = Object.Instantiate(lockPass),
+                accumulatePass = Object.Instantiate(accumulatePass),
+                sharpenPass = Object.Instantiate(sharpenPass),
+                autoGenReactivePass = Object.Instantiate(autoGenReactivePass),
+                tcrAutoGenPass = Object.Instantiate(tcrAutoGenPass),
+            };
+        }
+
+        /// <summary>
+        /// Destroy all the shaders within this instance.
+        /// Use this only on clones created through DeepCopy.
+        /// </summary>
+        public void Dispose()
+        {
+            Object.Destroy(computeLuminancePyramidPass);
+            Object.Destroy(reconstructPreviousDepthPass);
+            Object.Destroy(depthClipPass);
+            Object.Destroy(lockPass);
+            Object.Destroy(accumulatePass);
+            Object.Destroy(sharpenPass);
+            Object.Destroy(autoGenReactivePass);
+            Object.Destroy(tcrAutoGenPass);
+        }
+    }
+}
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerAssets.cs.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerAssets.cs.meta
new file mode 100644
index 00000000..de480321
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerAssets.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: aaeb3d821f826d44b84289a2dd23f90e
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerCallbacks.cs b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerCallbacks.cs
new file mode 100644
index 00000000..5b2c89cc
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerCallbacks.cs
@@ -0,0 +1,81 @@
+﻿// Copyright (c) 2023 Nico de Poel
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+using UnityEngine;
+
+namespace FidelityFX
+{
+    /// <summary>
+    /// A collection of callbacks required by the FSR3 Upscaler process.
+    /// This allows some customization by the game dev on how to integrate FSR3 upscaling into their own game setup.
+    /// </summary>
+    public interface IFsr3UpscalerCallbacks
+    {
+        /// <summary>
+        /// Apply a mipmap bias to in-game textures to prevent them from becoming blurry as the internal rendering resolution lowers.
+        /// This will need to be customized on a per-game basis, as there is no clear universal way to determine what are "in-game" textures.
+        /// The default implementation will simply apply a mipmap bias to all 2D textures, which will include things like UI textures and which might miss things like terrain texture arrays.
+        /// 
+        /// Depending on how your game organizes its assets, you will want to create a filter that more specifically selects the textures that need to have this mipmap bias applied.
+        /// You may also want to store the bias offset value and apply it to any assets that are loaded in on demand.
+        /// </summary>
+        void ApplyMipmapBias(float biasOffset);
+
+        void UndoMipmapBias();
+    }
+    
+    /// <summary>
+    /// Default implementation of IFsr3UpscalerCallbacks using simple Resources calls.
+    /// These are fine for testing but a proper game will want to extend and override these methods.
+    /// </summary>
+    public class Fsr3UpscalerCallbacksBase: IFsr3UpscalerCallbacks
+    {
+        protected float CurrentBiasOffset = 0;
+
+        public virtual void ApplyMipmapBias(float biasOffset)
+        {
+            if (float.IsNaN(biasOffset) || float.IsInfinity(biasOffset))
+                return;
+            
+            CurrentBiasOffset += biasOffset;
+            
+            if (Mathf.Approximately(CurrentBiasOffset, 0f))
+            {
+                CurrentBiasOffset = 0f;
+            }
+
+            foreach (var texture in Resources.FindObjectsOfTypeAll<Texture2D>())
+            {
+                if (texture.mipmapCount <= 1)
+                    continue;
+                
+                texture.mipMapBias += biasOffset;
+            }
+        }
+
+        public virtual void UndoMipmapBias()
+        {
+            if (CurrentBiasOffset == 0f)
+                return;
+
+            ApplyMipmapBias(-CurrentBiasOffset);
+        }
+    }
+}
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerCallbacks.cs.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerCallbacks.cs.meta
new file mode 100644
index 00000000..ae2ee1a3
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerCallbacks.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: e1b555daa29ec3043a8cf89b4db31a26
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerContext.cs b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerContext.cs
new file mode 100644
index 00000000..da02d9fa
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerContext.cs
@@ -0,0 +1,610 @@
+﻿// Copyright (c) 2023 Nico de Poel
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+using System;
+using System.Runtime.InteropServices;
+using UnityEngine;
+using UnityEngine.Rendering;
+
+namespace FidelityFX
+{
+    /// <summary>
+    /// This class loosely matches the FfxFsr3UpscalerContext struct from the original FSR3 codebase.
+    /// It manages the various resources and compute passes required by the FSR3 Upscaler process.
+    /// Note that this class does not know anything about Unity render pipelines; all it knows is CommandBuffers and RenderTargetIdentifiers.
+    /// This should make it suitable for integration with any of the available Unity render pipelines.
+    /// </summary>
+    public class Fsr3UpscalerContext
+    {
+        private const int MaxQueuedFrames = 16;
+        
+        private Fsr3Upscaler.ContextDescription _contextDescription;
+        private CommandBuffer _commandBuffer;
+        
+        private Fsr3UpscalerPass _depthClipPass;
+        private Fsr3UpscalerPass _reconstructPreviousDepthPass;
+        private Fsr3UpscalerPass _lockPass;
+        private Fsr3UpscalerPass _accumulatePass;
+        private Fsr3UpscalerPass _sharpenPass;
+        private Fsr3UpscalerPass _computeLuminancePyramidPass;
+        private Fsr3UpscalerPass _generateReactivePass;
+        private Fsr3UpscalerPass _tcrAutogeneratePass;
+
+        private readonly Fsr3UpscalerResources _resources = new Fsr3UpscalerResources();
+
+        private ComputeBuffer _upscalerConstantsBuffer;
+        private readonly Fsr3Upscaler.UpscalerConstants[] _upscalerConstantsArray = { new Fsr3Upscaler.UpscalerConstants() };
+        private ref Fsr3Upscaler.UpscalerConstants UpscalerConsts => ref _upscalerConstantsArray[0];
+
+        private ComputeBuffer _spdConstantsBuffer;
+        private readonly Fsr3Upscaler.SpdConstants[] _spdConstantsArray = { new Fsr3Upscaler.SpdConstants() };
+        private ref Fsr3Upscaler.SpdConstants SpdConsts => ref _spdConstantsArray[0];
+    
+        private ComputeBuffer _rcasConstantsBuffer;
+        private readonly Fsr3Upscaler.RcasConstants[] _rcasConstantsArray = new Fsr3Upscaler.RcasConstants[1];
+        private ref Fsr3Upscaler.RcasConstants RcasConsts => ref _rcasConstantsArray[0];
+
+        private ComputeBuffer _generateReactiveConstantsBuffer;
+        private readonly Fsr3Upscaler.GenerateReactiveConstants[] _generateReactiveConstantsArray = { new Fsr3Upscaler.GenerateReactiveConstants() };
+        private ref Fsr3Upscaler.GenerateReactiveConstants GenReactiveConsts => ref _generateReactiveConstantsArray[0];
+
+        private ComputeBuffer _tcrAutogenerateConstantsBuffer;
+        private readonly Fsr3Upscaler.GenerateReactiveConstants2[] _tcrAutogenerateConstantsArray = { new Fsr3Upscaler.GenerateReactiveConstants2() };
+        private ref Fsr3Upscaler.GenerateReactiveConstants2 TcrAutoGenConsts => ref _tcrAutogenerateConstantsArray[0];
+
+        private bool _firstExecution;
+        private Vector2 _previousJitterOffset;
+        private int _resourceFrameIndex;
+
+        public void Create(Fsr3Upscaler.ContextDescription contextDescription)
+        {
+            _contextDescription = contextDescription;
+            _commandBuffer = new CommandBuffer { name = "FSR3 Upscaler" };
+            
+            _upscalerConstantsBuffer = CreateConstantBuffer<Fsr3Upscaler.UpscalerConstants>();
+            _spdConstantsBuffer = CreateConstantBuffer<Fsr3Upscaler.SpdConstants>();
+            _rcasConstantsBuffer = CreateConstantBuffer<Fsr3Upscaler.RcasConstants>();
+            _generateReactiveConstantsBuffer = CreateConstantBuffer<Fsr3Upscaler.GenerateReactiveConstants>();
+            _tcrAutogenerateConstantsBuffer = CreateConstantBuffer<Fsr3Upscaler.GenerateReactiveConstants2>();
+
+            // Set defaults
+            _firstExecution = true;
+            _resourceFrameIndex = 0;
+            
+            UpscalerConsts.displaySize = _contextDescription.DisplaySize;
+            
+            _resources.Create(_contextDescription);
+            CreatePasses();
+        }
+
+        private void CreatePasses()
+        {
+            _computeLuminancePyramidPass = new Fsr3UpscalerComputeLuminancePyramidPass(_contextDescription, _resources, _upscalerConstantsBuffer, _spdConstantsBuffer);
+            _reconstructPreviousDepthPass = new Fsr3UpscalerReconstructPreviousDepthPass(_contextDescription, _resources, _upscalerConstantsBuffer);
+            _depthClipPass = new Fsr3UpscalerDepthClipPass(_contextDescription, _resources, _upscalerConstantsBuffer);
+            _lockPass = new Fsr3UpscalerLockPass(_contextDescription, _resources, _upscalerConstantsBuffer);
+            _accumulatePass = new Fsr3UpscalerAccumulatePass(_contextDescription, _resources, _upscalerConstantsBuffer);
+            _sharpenPass = new Fsr3UpscalerSharpenPass(_contextDescription, _resources, _upscalerConstantsBuffer, _rcasConstantsBuffer);
+            _generateReactivePass = new Fsr3UpscalerGenerateReactivePass(_contextDescription, _resources, _generateReactiveConstantsBuffer);
+            _tcrAutogeneratePass = new Fsr3UpscalerTcrAutogeneratePass(_contextDescription, _resources, _upscalerConstantsBuffer, _tcrAutogenerateConstantsBuffer);
+        }
+        
+        public void Destroy()
+        {
+            DestroyPass(ref _tcrAutogeneratePass);
+            DestroyPass(ref _generateReactivePass);
+            DestroyPass(ref _computeLuminancePyramidPass);
+            DestroyPass(ref _sharpenPass);
+            DestroyPass(ref _accumulatePass);
+            DestroyPass(ref _lockPass);
+            DestroyPass(ref _reconstructPreviousDepthPass);
+            DestroyPass(ref _depthClipPass);
+            
+            _resources.Destroy();
+            
+            DestroyConstantBuffer(ref _tcrAutogenerateConstantsBuffer);
+            DestroyConstantBuffer(ref _generateReactiveConstantsBuffer);
+            DestroyConstantBuffer(ref _rcasConstantsBuffer);
+            DestroyConstantBuffer(ref _spdConstantsBuffer);
+            DestroyConstantBuffer(ref _upscalerConstantsBuffer);
+
+            _commandBuffer.Dispose();
+            _commandBuffer = null;
+        }
+
+        public void Dispatch(Fsr3Upscaler.DispatchDescription dispatchParams)
+        {
+            _commandBuffer.Clear();
+            Dispatch(dispatchParams, _commandBuffer);
+            Graphics.ExecuteCommandBuffer(_commandBuffer);
+        }
+        
+        public void Dispatch(Fsr3Upscaler.DispatchDescription dispatchParams, CommandBuffer commandBuffer)
+        {
+            if ((_contextDescription.Flags & Fsr3Upscaler.InitializationFlags.EnableDebugChecking) != 0)
+            {
+                DebugCheckDispatch(dispatchParams);
+            }
+            
+            if (_firstExecution)
+            {
+                commandBuffer.SetRenderTarget(_resources.LockStatus[0]);
+                commandBuffer.ClearRenderTarget(false, true, Color.clear);
+                commandBuffer.SetRenderTarget(_resources.LockStatus[1]);
+                commandBuffer.ClearRenderTarget(false, true, Color.clear);
+            }
+            
+            int frameIndex = _resourceFrameIndex % 2;
+            bool resetAccumulation = dispatchParams.Reset || _firstExecution;
+            _firstExecution = false;
+
+            // If auto exposure is enabled use the auto exposure SRV, otherwise what the app sends
+            if ((_contextDescription.Flags & Fsr3Upscaler.InitializationFlags.EnableAutoExposure) != 0)
+                dispatchParams.Exposure = new ResourceView(_resources.AutoExposure);
+            else if (!dispatchParams.Exposure.IsValid) 
+                dispatchParams.Exposure = new ResourceView(_resources.DefaultExposure);
+
+            if (dispatchParams.EnableAutoReactive)
+            {
+                // Create the auto-TCR resources only when we need them
+                if (_resources.AutoReactive == null)
+                    _resources.CreateTcrAutogenResources(_contextDescription);
+
+                if (resetAccumulation)
+                {
+                    RenderTargetIdentifier opaqueOnly = dispatchParams.ColorOpaqueOnly.IsValid ? dispatchParams.ColorOpaqueOnly.RenderTarget : Fsr3ShaderIDs.SrvOpaqueOnly;
+                    commandBuffer.Blit(_resources.PrevPreAlpha[frameIndex ^ 1], opaqueOnly);
+                }
+            }
+            else if (_resources.AutoReactive != null)
+            {
+                // Destroy the auto-TCR resources if we don't use the feature 
+                _resources.DestroyTcrAutogenResources();
+            }
+            
+            if (!dispatchParams.Reactive.IsValid) dispatchParams.Reactive = new ResourceView(_resources.DefaultReactive);
+            if (!dispatchParams.TransparencyAndComposition.IsValid) dispatchParams.TransparencyAndComposition = new ResourceView(_resources.DefaultReactive);
+            Fsr3UpscalerResources.CreateAliasableResources(commandBuffer, _contextDescription, dispatchParams);
+            
+            SetupConstants(dispatchParams, resetAccumulation);
+            
+            // Reactive mask bias
+            const int threadGroupWorkRegionDim = 8;
+            int dispatchSrcX = (UpscalerConsts.renderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+            int dispatchSrcY = (UpscalerConsts.renderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+            int dispatchDstX = (_contextDescription.DisplaySize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+            int dispatchDstY = (_contextDescription.DisplaySize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+
+            // Clear reconstructed depth for max depth store
+            if (resetAccumulation)
+            {
+                commandBuffer.SetRenderTarget(_resources.LockStatus[frameIndex ^ 1]);
+                commandBuffer.ClearRenderTarget(false, true, Color.clear);
+                
+                commandBuffer.SetRenderTarget(_resources.InternalUpscaled[frameIndex ^ 1]);
+                commandBuffer.ClearRenderTarget(false, true, Color.clear);
+                
+                commandBuffer.SetRenderTarget(_resources.SceneLuminance);
+                commandBuffer.ClearRenderTarget(false, true, Color.clear);
+                
+                // Auto exposure always used to track luma changes in locking logic
+                commandBuffer.SetRenderTarget(_resources.AutoExposure);
+                commandBuffer.ClearRenderTarget(false, true, new Color(0f, 1e8f, 0f, 0f));
+
+                // Reset atomic counter to 0
+                commandBuffer.SetRenderTarget(_resources.SpdAtomicCounter);
+                commandBuffer.ClearRenderTarget(false, true, Color.clear);
+            }
+            
+            // FSR3: need to clear here since we need the content of this surface for frame interpolation, so clearing in the lock pass is not an option
+            bool depthInverted = (_contextDescription.Flags & Fsr3Upscaler.InitializationFlags.EnableDepthInverted) == Fsr3Upscaler.InitializationFlags.EnableDepthInverted;
+            commandBuffer.SetRenderTarget(Fsr3ShaderIDs.UavReconstructedPrevNearestDepth);
+            commandBuffer.ClearRenderTarget(false, true, depthInverted ? Color.clear : Color.white);
+            
+            // Auto exposure
+            SetupSpdConstants(dispatchParams, out var dispatchThreadGroupCount);
+            
+            // Initialize constant buffers data
+            commandBuffer.SetBufferData(_upscalerConstantsBuffer, _upscalerConstantsArray);
+            commandBuffer.SetBufferData(_spdConstantsBuffer, _spdConstantsArray);
+
+            // Auto reactive
+            if (dispatchParams.EnableAutoReactive)
+            {
+                GenerateTransparencyCompositionReactive(dispatchParams, commandBuffer, frameIndex);
+                dispatchParams.Reactive = new ResourceView(_resources.AutoReactive);
+                dispatchParams.TransparencyAndComposition = new ResourceView(_resources.AutoComposition);
+            }
+            
+            // Compute luminance pyramid
+            _computeLuminancePyramidPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchThreadGroupCount.x, dispatchThreadGroupCount.y);
+
+            // Reconstruct previous depth
+            _reconstructPreviousDepthPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY);
+
+            // Depth clip
+            _depthClipPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY);
+
+            // Create locks
+            _lockPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY);
+
+            // Accumulate
+            _accumulatePass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchDstX, dispatchDstY);
+
+            if (dispatchParams.EnableSharpening)
+            {
+                // Compute the constants
+                SetupRcasConstants(dispatchParams);
+                commandBuffer.SetBufferData(_rcasConstantsBuffer, _rcasConstantsArray);
+                
+                // Dispatch RCAS
+                const int threadGroupWorkRegionDimRcas = 16;
+                int threadGroupsX = (Screen.width + threadGroupWorkRegionDimRcas - 1) / threadGroupWorkRegionDimRcas;
+                int threadGroupsY = (Screen.height + threadGroupWorkRegionDimRcas - 1) / threadGroupWorkRegionDimRcas;
+                _sharpenPass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, threadGroupsX, threadGroupsY);
+            }
+
+            _resourceFrameIndex = (_resourceFrameIndex + 1) % MaxQueuedFrames;
+
+            Fsr3UpscalerResources.DestroyAliasableResources(commandBuffer);
+        }
+
+        public void GenerateReactiveMask(Fsr3Upscaler.GenerateReactiveDescription dispatchParams)
+        {
+            _commandBuffer.Clear();
+            GenerateReactiveMask(dispatchParams, _commandBuffer);
+            Graphics.ExecuteCommandBuffer(_commandBuffer);
+        }
+
+        public void GenerateReactiveMask(Fsr3Upscaler.GenerateReactiveDescription dispatchParams, CommandBuffer commandBuffer)
+        {
+            const int threadGroupWorkRegionDim = 8;
+            int dispatchSrcX = (dispatchParams.RenderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+            int dispatchSrcY = (dispatchParams.RenderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+
+            GenReactiveConsts.scale = dispatchParams.Scale;
+            GenReactiveConsts.threshold = dispatchParams.CutoffThreshold;
+            GenReactiveConsts.binaryValue = dispatchParams.BinaryValue;
+            GenReactiveConsts.flags = (uint)dispatchParams.Flags;
+            commandBuffer.SetBufferData(_generateReactiveConstantsBuffer, _generateReactiveConstantsArray);
+            
+            ((Fsr3UpscalerGenerateReactivePass)_generateReactivePass).ScheduleDispatch(commandBuffer, dispatchParams, dispatchSrcX, dispatchSrcY);
+        }
+
+        private void GenerateTransparencyCompositionReactive(Fsr3Upscaler.DispatchDescription dispatchParams, CommandBuffer commandBuffer, int frameIndex)
+        {
+            const int threadGroupWorkRegionDim = 8;
+            int dispatchSrcX = (dispatchParams.RenderSize.x + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+            int dispatchSrcY = (dispatchParams.RenderSize.y + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+
+            TcrAutoGenConsts.autoTcThreshold = dispatchParams.AutoTcThreshold;
+            TcrAutoGenConsts.autoTcScale = dispatchParams.AutoTcScale;
+            TcrAutoGenConsts.autoReactiveScale = dispatchParams.AutoReactiveScale;
+            TcrAutoGenConsts.autoReactiveMax = dispatchParams.AutoReactiveMax;
+            commandBuffer.SetBufferData(_tcrAutogenerateConstantsBuffer, _tcrAutogenerateConstantsArray);
+            
+            _tcrAutogeneratePass.ScheduleDispatch(commandBuffer, dispatchParams, frameIndex, dispatchSrcX, dispatchSrcY);
+        }
+
+        private void SetupConstants(Fsr3Upscaler.DispatchDescription dispatchParams, bool resetAccumulation)
+        {
+            ref Fsr3Upscaler.UpscalerConstants constants = ref UpscalerConsts;
+            
+            constants.jitterOffset = dispatchParams.JitterOffset;
+            constants.renderSize = dispatchParams.RenderSize;
+            constants.maxRenderSize = _contextDescription.MaxRenderSize;
+            constants.inputColorResourceDimensions = dispatchParams.InputResourceSize;
+
+            // Compute the horizontal FOV for the shader from the vertical one
+            float aspectRatio = (float)dispatchParams.RenderSize.x / dispatchParams.RenderSize.y;
+            float cameraAngleHorizontal = Mathf.Atan(Mathf.Tan(dispatchParams.CameraFovAngleVertical / 2.0f) * aspectRatio) * 2.0f;
+            constants.tanHalfFOV = Mathf.Tan(cameraAngleHorizontal * 0.5f);
+            constants.viewSpaceToMetersFactor = (dispatchParams.ViewSpaceToMetersFactor > 0.0f) ? dispatchParams.ViewSpaceToMetersFactor : 1.0f;
+
+            // Compute params to enable device depth to view space depth computation in shader
+            constants.deviceToViewDepth = SetupDeviceDepthToViewSpaceDepthParams(dispatchParams);
+            
+            // To be updated if resource is larger than the actual image size
+            constants.downscaleFactor = new Vector2((float)constants.renderSize.x / _contextDescription.DisplaySize.x, (float)constants.renderSize.y / _contextDescription.DisplaySize.y);
+            constants.previousFramePreExposure = constants.preExposure;
+            constants.preExposure = (dispatchParams.PreExposure != 0) ? dispatchParams.PreExposure : 1.0f;
+            
+            // Motion vector data
+            Vector2Int motionVectorsTargetSize = (_contextDescription.Flags & Fsr3Upscaler.InitializationFlags.EnableDisplayResolutionMotionVectors) != 0 ? constants.displaySize : constants.renderSize;
+            constants.motionVectorScale = dispatchParams.MotionVectorScale / motionVectorsTargetSize;
+            
+            // Compute jitter cancellation
+            if ((_contextDescription.Flags & Fsr3Upscaler.InitializationFlags.EnableMotionVectorsJitterCancellation) != 0)
+            {
+                constants.motionVectorJitterCancellation = (_previousJitterOffset - constants.jitterOffset) / motionVectorsTargetSize;
+                _previousJitterOffset = constants.jitterOffset;
+            }
+
+            int jitterPhaseCount = Fsr3Upscaler.GetJitterPhaseCount(dispatchParams.RenderSize.x, _contextDescription.DisplaySize.x);
+            if (resetAccumulation || constants.jitterPhaseCount == 0)
+            {
+                constants.jitterPhaseCount = jitterPhaseCount;
+            }
+            else
+            {
+                int jitterPhaseCountDelta = (int)(jitterPhaseCount - constants.jitterPhaseCount);
+                if (jitterPhaseCountDelta > 0)
+                    constants.jitterPhaseCount++;
+                else if (jitterPhaseCountDelta < 0)
+                    constants.jitterPhaseCount--;
+            }
+            
+            // Convert delta time to seconds and clamp to [0, 1]
+            constants.deltaTime = Mathf.Clamp01(dispatchParams.FrameTimeDelta);
+
+            if (resetAccumulation)
+                constants.frameIndex = 0;
+            else
+                constants.frameIndex++;
+
+            // Shading change usage of the SPD mip levels
+            constants.lumaMipLevelToUse = Fsr3UpscalerPass.ShadingChangeMipLevel;
+
+            float mipDiv = 2 << constants.lumaMipLevelToUse;
+            constants.lumaMipDimensions.x = (int)(constants.maxRenderSize.x / mipDiv);
+            constants.lumaMipDimensions.y = (int)(constants.maxRenderSize.y / mipDiv);
+        }
+        
+        private Vector4 SetupDeviceDepthToViewSpaceDepthParams(Fsr3Upscaler.DispatchDescription dispatchParams)
+        {
+            bool inverted = (_contextDescription.Flags & Fsr3Upscaler.InitializationFlags.EnableDepthInverted) != 0;
+            bool infinite = (_contextDescription.Flags & Fsr3Upscaler.InitializationFlags.EnableDepthInfinite) != 0;
+
+            // make sure it has no impact if near and far plane values are swapped in dispatch params
+            // the flags "inverted" and "infinite" will decide what transform to use
+            float min = Mathf.Min(dispatchParams.CameraNear, dispatchParams.CameraFar);
+            float max = Mathf.Max(dispatchParams.CameraNear, dispatchParams.CameraFar);
+
+            if (inverted)
+            {
+                (min, max) = (max, min);
+            }
+
+            float q = max / (min - max);
+            float d = -1.0f;
+
+            Vector4 matrixElemC = new Vector4(q, -1.0f - Mathf.Epsilon, q, 0.0f + Mathf.Epsilon);
+            Vector4 matrixElemE = new Vector4(q * min, -min - Mathf.Epsilon, q * min, max);
+            
+            // Revert x and y coords
+            float aspect = (float)dispatchParams.RenderSize.x / dispatchParams.RenderSize.y;
+            float cotHalfFovY = Mathf.Cos(0.5f * dispatchParams.CameraFovAngleVertical) / Mathf.Sin(0.5f * dispatchParams.CameraFovAngleVertical);
+
+            int matrixIndex = (inverted ? 2 : 0) + (infinite ? 1 : 0);
+            return new Vector4(
+                d * matrixElemC[matrixIndex],
+                matrixElemE[matrixIndex],
+                aspect / cotHalfFovY,
+                1.0f / cotHalfFovY);
+        }
+
+        private void SetupRcasConstants(Fsr3Upscaler.DispatchDescription dispatchParams)
+        {
+            int sharpnessIndex = Mathf.RoundToInt(Mathf.Clamp01(dispatchParams.Sharpness) * (RcasConfigs.Length - 1));
+            RcasConsts = RcasConfigs[sharpnessIndex];
+        }
+
+        private void SetupSpdConstants(Fsr3Upscaler.DispatchDescription dispatchParams, out Vector2Int dispatchThreadGroupCount)
+        {
+            RectInt rectInfo = new RectInt(0, 0, dispatchParams.RenderSize.x, dispatchParams.RenderSize.y);
+            SpdSetup(rectInfo, out dispatchThreadGroupCount, out var workGroupOffset, out var numWorkGroupsAndMips);
+            
+            // Downsample
+            ref Fsr3Upscaler.SpdConstants spdConstants = ref SpdConsts;
+            spdConstants.numWorkGroups = (uint)numWorkGroupsAndMips.x;
+            spdConstants.mips = (uint)numWorkGroupsAndMips.y;
+            spdConstants.workGroupOffsetX = (uint)workGroupOffset.x;
+            spdConstants.workGroupOffsetY = (uint)workGroupOffset.y;
+            spdConstants.renderSizeX = (uint)dispatchParams.RenderSize.x;
+            spdConstants.renderSizeY = (uint)dispatchParams.RenderSize.y;
+        }
+
+        private static void SpdSetup(RectInt rectInfo, out Vector2Int dispatchThreadGroupCount, out Vector2Int workGroupOffset, out Vector2Int numWorkGroupsAndMips, int mips = -1)
+        {
+            workGroupOffset = new Vector2Int(rectInfo.x / 64, rectInfo.y / 64);
+
+            int endIndexX = (rectInfo.x + rectInfo.width - 1) / 64;
+            int endIndexY = (rectInfo.y + rectInfo.height - 1) / 64;
+
+            dispatchThreadGroupCount = new Vector2Int(endIndexX + 1 - workGroupOffset.x, endIndexY + 1 - workGroupOffset.y);
+
+            numWorkGroupsAndMips = new Vector2Int(dispatchThreadGroupCount.x * dispatchThreadGroupCount.y, mips);
+            if (mips < 0)
+            {
+                float resolution = Math.Max(rectInfo.width, rectInfo.height);
+                numWorkGroupsAndMips.y = Math.Min(Mathf.FloorToInt(Mathf.Log(resolution, 2.0f)), 12);
+            }
+        }
+
+        private void DebugCheckDispatch(Fsr3Upscaler.DispatchDescription dispatchParams)
+        {
+            if (!dispatchParams.Color.IsValid)
+            {
+                Debug.LogError("Color resource is null");
+            }
+            
+            if (!dispatchParams.Depth.IsValid)
+            {
+                Debug.LogError("Depth resource is null");
+            }
+            
+            if (!dispatchParams.MotionVectors.IsValid)
+            {
+                Debug.LogError("MotionVectors resource is null");
+            }
+            
+            if (!dispatchParams.Output.IsValid)
+            {
+                Debug.LogError("Output resource is null");
+            }
+            
+            if (dispatchParams.Exposure.IsValid && (_contextDescription.Flags & Fsr3Upscaler.InitializationFlags.EnableAutoExposure) != 0)
+            {
+                Debug.LogWarning("Exposure resource provided, however auto exposure flag is present");
+            }
+
+            if (Mathf.Abs(dispatchParams.JitterOffset.x) > 1.0f || Mathf.Abs(dispatchParams.JitterOffset.y) > 1.0f)
+            {
+                Debug.LogWarning("JitterOffset contains value outside of expected range [-1.0, 1.0]");
+            }
+
+            if (dispatchParams.MotionVectorScale.x > _contextDescription.MaxRenderSize.x || dispatchParams.MotionVectorScale.y > _contextDescription.MaxRenderSize.y)
+            {
+                Debug.LogWarning("MotionVectorScale contains scale value greater than MaxRenderSize");
+            }
+
+            if (dispatchParams.MotionVectorScale.x == 0.0f || dispatchParams.MotionVectorScale.y == 0.0f)
+            {
+                Debug.LogWarning("MotionVectorScale contains zero scale value");
+            }
+
+            if (dispatchParams.RenderSize.x > _contextDescription.MaxRenderSize.x || dispatchParams.RenderSize.y > _contextDescription.MaxRenderSize.y)
+            {
+                Debug.LogWarning("RenderSize is greater than context MaxRenderSize");
+            }
+
+            if (dispatchParams.RenderSize.x == 0 || dispatchParams.RenderSize.y == 0)
+            {
+                Debug.LogWarning("RenderSize contains zero dimension");
+            }
+
+            if (dispatchParams.FrameTimeDelta > 1.0f)
+            {
+                Debug.LogWarning("FrameTimeDelta is greater than 1.0f - this value should be seconds (~0.0166 for 60fps)");
+            }
+
+            if (dispatchParams.PreExposure == 0.0f)
+            {
+                Debug.LogError("PreExposure provided as 0.0f which is invalid");
+            }
+
+            bool infiniteDepth = (_contextDescription.Flags & Fsr3Upscaler.InitializationFlags.EnableDepthInfinite) != 0;
+            bool inverseDepth = (_contextDescription.Flags & Fsr3Upscaler.InitializationFlags.EnableDepthInverted) != 0;
+
+            if (inverseDepth)
+            {
+                if (dispatchParams.CameraNear < dispatchParams.CameraFar)
+                {
+                    Debug.LogWarning("EnableDepthInverted flag is present yet CameraNear is less than CameraFar");
+                }
+
+                if (infiniteDepth)
+                {
+                    if (dispatchParams.CameraNear < float.MaxValue)
+                    {
+                        Debug.LogWarning("EnableDepthInfinite and EnableDepthInverted present, yet CameraNear != float.MaxValue");
+                    }
+                }
+                
+                if (dispatchParams.CameraFar < 0.075f)
+                {
+                    Debug.LogWarning("EnableDepthInverted present, CameraFar value is very low which may result in depth separation artefacting");
+                }
+            }
+            else
+            {
+                if (dispatchParams.CameraNear > dispatchParams.CameraFar)
+                {
+                    Debug.LogWarning("CameraNear is greater than CameraFar in non-inverted-depth context");
+                }
+
+                if (infiniteDepth)
+                {
+                    if (dispatchParams.CameraFar < float.MaxValue)
+                    {
+                        Debug.LogWarning("EnableDepthInfinite present, yet CameraFar != float.MaxValue");
+                    }
+                }
+
+                if (dispatchParams.CameraNear < 0.075f)
+                {
+                    Debug.LogWarning("CameraNear value is very low which may result in depth separation artefacting");
+                }
+            }
+
+            if (dispatchParams.CameraFovAngleVertical <= 0.0f)
+            {
+                Debug.LogError("CameraFovAngleVertical is 0.0f - this value should be > 0.0f");
+            }
+
+            if (dispatchParams.CameraFovAngleVertical > Mathf.PI)
+            {
+                Debug.LogError("CameraFovAngleVertical is greater than 180 degrees/PI");
+            }
+        }
+
+        /// <summary>
+        /// The FSR3 C++ codebase uses floats bitwise converted to ints to pass sharpness parameters to the RCAS shader.
+        /// This is not possible in C# without enabling unsafe code compilation, so to avoid that we instead use a table of precomputed values.
+        /// </summary>
+        private static readonly Fsr3Upscaler.RcasConstants[] RcasConfigs = new []
+        {
+            new Fsr3Upscaler.RcasConstants(1048576000u, 872428544u),
+            new Fsr3Upscaler.RcasConstants(1049178080u, 877212745u),
+            new Fsr3Upscaler.RcasConstants(1049823372u, 882390168u),
+            new Fsr3Upscaler.RcasConstants(1050514979u, 887895276u),
+            new Fsr3Upscaler.RcasConstants(1051256227u, 893859143u),
+            new Fsr3Upscaler.RcasConstants(1052050675u, 900216232u),
+            new Fsr3Upscaler.RcasConstants(1052902144u, 907032080u),
+            new Fsr3Upscaler.RcasConstants(1053814727u, 914306687u),
+            new Fsr3Upscaler.RcasConstants(1054792807u, 922105590u),
+            new Fsr3Upscaler.RcasConstants(1055841087u, 930494326u),
+            new Fsr3Upscaler.RcasConstants(1056964608u, 939538432u),
+            new Fsr3Upscaler.RcasConstants(1057566688u, 944322633u),
+            new Fsr3Upscaler.RcasConstants(1058211980u, 949500056u),
+            new Fsr3Upscaler.RcasConstants(1058903587u, 955005164u),
+            new Fsr3Upscaler.RcasConstants(1059644835u, 960969031u),
+            new Fsr3Upscaler.RcasConstants(1060439283u, 967326120u),
+            new Fsr3Upscaler.RcasConstants(1061290752u, 974141968u),
+            new Fsr3Upscaler.RcasConstants(1062203335u, 981416575u),
+            new Fsr3Upscaler.RcasConstants(1063181415u, 989215478u),
+            new Fsr3Upscaler.RcasConstants(1064229695u, 997604214u),
+            new Fsr3Upscaler.RcasConstants(1065353216u, 1006648320),
+        };
+        
+        private static ComputeBuffer CreateConstantBuffer<TConstants>() where TConstants: struct
+        {
+            return new ComputeBuffer(1, Marshal.SizeOf<TConstants>(), ComputeBufferType.Constant);
+        }
+        
+        private static void DestroyConstantBuffer(ref ComputeBuffer bufferRef)
+        {
+            if (bufferRef == null)
+                return;
+            
+            bufferRef.Release();
+            bufferRef = null;
+        }
+
+        private static void DestroyPass(ref Fsr3UpscalerPass pass)
+        {
+            if (pass == null)
+                return;
+            
+            pass.Dispose();
+            pass = null;
+        }
+    }
+}
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerContext.cs.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerContext.cs.meta
new file mode 100644
index 00000000..673b2ef1
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerContext.cs.meta
@@ -0,0 +1,3 @@
+﻿fileFormatVersion: 2
+guid: 14c8dc4c7c3e4ac418e50a859cec0b2f
+timeCreated: 1673442225
\ No newline at end of file
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerPass.cs b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerPass.cs
new file mode 100644
index 00000000..e1dc2253
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerPass.cs
@@ -0,0 +1,372 @@
+﻿// Copyright (c) 2023 Nico de Poel
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+using System;
+using System.Runtime.InteropServices;
+using UnityEngine;
+using UnityEngine.Rendering;
+
+namespace FidelityFX
+{
+    /// <summary>
+    /// Base class for all of the compute passes that make up the FSR3 Upscaler process.
+    /// This loosely matches the FfxPipelineState struct from the original FSR3 codebase, wrapped in an object-oriented blanket.
+    /// These classes are responsible for loading compute shaders, managing temporary resources, binding resources to shader kernels and dispatching said shaders.
+    /// </summary>
+    internal abstract class Fsr3UpscalerPass: IDisposable
+    {
+        internal const int ShadingChangeMipLevel = 4;   // This matches the FFX_FSR3UPSCALER_SHADING_CHANGE_MIP_LEVEL define
+
+        protected readonly Fsr3Upscaler.ContextDescription ContextDescription;
+        protected readonly Fsr3UpscalerResources Resources;
+        protected readonly ComputeBuffer Constants;
+        
+        protected ComputeShader ComputeShader;
+        protected int KernelIndex;
+        
+        protected Fsr3UpscalerPass(Fsr3Upscaler.ContextDescription contextDescription, Fsr3UpscalerResources resources, ComputeBuffer constants)
+        {
+            ContextDescription = contextDescription;
+            Resources = resources;
+            Constants = constants;
+        }
+
+        public virtual void Dispose()
+        {
+        }
+
+        public abstract void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY);
+
+        protected void InitComputeShader(string passName, ComputeShader shader)
+        {
+            InitComputeShader(passName, shader, ContextDescription.Flags);
+        }
+        
+        private void InitComputeShader(string passName, ComputeShader shader, Fsr3Upscaler.InitializationFlags flags)
+        {
+            if (shader == null)
+            {
+                throw new MissingReferenceException($"Shader for FSR3 Upscaler '{passName}' could not be loaded! Please ensure it is included in the project correctly.");
+            }
+
+            ComputeShader = shader;
+            KernelIndex = ComputeShader.FindKernel("CS");
+
+            bool useLut = false;
+#if UNITY_2022_1_OR_NEWER   // This will also work in 2020.3.43+ and 2021.3.14+ 
+            if (SystemInfo.computeSubGroupSize == 64)
+            {
+                useLut = true;
+            }
+#endif
+            
+            // This matches the permutation rules from the CreatePipeline* functions
+            if ((flags & Fsr3Upscaler.InitializationFlags.EnableHighDynamicRange) != 0) ComputeShader.EnableKeyword("FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT");
+            if ((flags & Fsr3Upscaler.InitializationFlags.EnableDisplayResolutionMotionVectors) == 0) ComputeShader.EnableKeyword("FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS");
+            if ((flags & Fsr3Upscaler.InitializationFlags.EnableMotionVectorsJitterCancellation) != 0) ComputeShader.EnableKeyword("FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS");
+            if ((flags & Fsr3Upscaler.InitializationFlags.EnableDepthInverted) != 0) ComputeShader.EnableKeyword("FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH");
+            if (useLut) ComputeShader.EnableKeyword("FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE");
+            if ((flags & Fsr3Upscaler.InitializationFlags.EnableFP16Usage) != 0) ComputeShader.EnableKeyword("FFX_HALF");
+
+            // Inform the shader which render pipeline we're currently using
+            var pipeline = GraphicsSettings.currentRenderPipeline;
+            if (pipeline != null && pipeline.GetType().Name.Contains("HDRenderPipeline"))
+            {
+                ComputeShader.EnableKeyword("UNITY_FSR3UPSCALER_HDRP");
+            }
+        }
+    }
+
+    internal class Fsr3UpscalerComputeLuminancePyramidPass : Fsr3UpscalerPass
+    {
+        private readonly ComputeBuffer _spdConstants;
+        
+        public Fsr3UpscalerComputeLuminancePyramidPass(Fsr3Upscaler.ContextDescription contextDescription, Fsr3UpscalerResources resources, ComputeBuffer constants, ComputeBuffer spdConstants)
+            : base(contextDescription, resources, constants)
+        {
+            _spdConstants = spdConstants;
+            
+            InitComputeShader("compute_luminance_pyramid_pass", contextDescription.Shaders.computeLuminancePyramidPass);
+        }
+
+        public override void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY)
+        {
+            ref var color = ref dispatchParams.Color;
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputColor, color.RenderTarget, color.MipLevel, color.SubElement);
+
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavSpdAtomicCount, Resources.SpdAtomicCounter);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavExposureMipLumaChange, Resources.SceneLuminance, ShadingChangeMipLevel);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavExposureMip5, Resources.SceneLuminance, 5);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavAutoExposure, Resources.AutoExposure);
+
+            commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3Upscaler, Constants, 0, Marshal.SizeOf<Fsr3Upscaler.UpscalerConstants>());
+            commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbSpd, _spdConstants, 0, Marshal.SizeOf<Fsr3Upscaler.SpdConstants>());
+            
+            commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1);
+        }
+    }
+
+    internal class Fsr3UpscalerReconstructPreviousDepthPass : Fsr3UpscalerPass
+    {
+        public Fsr3UpscalerReconstructPreviousDepthPass(Fsr3Upscaler.ContextDescription contextDescription, Fsr3UpscalerResources resources, ComputeBuffer constants)
+            : base(contextDescription, resources, constants)
+        {
+            InitComputeShader("reconstruct_previous_depth_pass", contextDescription.Shaders.reconstructPreviousDepthPass);
+        }
+
+        public override void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY)
+        {
+            ref var color = ref dispatchParams.Color;
+            ref var depth = ref dispatchParams.Depth;
+            ref var motionVectors = ref dispatchParams.MotionVectors;
+            ref var exposure = ref dispatchParams.Exposure;
+            
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputColor, color.RenderTarget, color.MipLevel, color.SubElement);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputDepth, depth.RenderTarget, depth.MipLevel, depth.SubElement);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputMotionVectors, motionVectors.RenderTarget, motionVectors.MipLevel, motionVectors.SubElement);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputExposure, exposure.RenderTarget, exposure.MipLevel, exposure.SubElement);
+
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]);
+            
+            commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3Upscaler, Constants, 0, Marshal.SizeOf<Fsr3Upscaler.UpscalerConstants>());
+            
+            commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1);
+        }
+    }
+    
+    internal class Fsr3UpscalerDepthClipPass : Fsr3UpscalerPass
+    {
+        public Fsr3UpscalerDepthClipPass(Fsr3Upscaler.ContextDescription contextDescription, Fsr3UpscalerResources resources, ComputeBuffer constants)
+            : base(contextDescription, resources, constants)
+        {
+            InitComputeShader("depth_clip_pass", contextDescription.Shaders.depthClipPass);
+        }
+
+        public override void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY)
+        {
+            ref var color = ref dispatchParams.Color;
+            ref var depth = ref dispatchParams.Depth;
+            ref var motionVectors = ref dispatchParams.MotionVectors;
+            ref var exposure = ref dispatchParams.Exposure;
+            ref var reactive = ref dispatchParams.Reactive;
+            ref var tac = ref dispatchParams.TransparencyAndComposition;
+            
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputColor, color.RenderTarget, color.MipLevel, color.SubElement);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputDepth, depth.RenderTarget, depth.MipLevel, depth.SubElement);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputMotionVectors, motionVectors.RenderTarget, motionVectors.MipLevel, motionVectors.SubElement);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputExposure, exposure.RenderTarget, exposure.MipLevel, exposure.SubElement);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvReactiveMask, reactive.RenderTarget, reactive.MipLevel, reactive.SubElement);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvTransparencyAndCompositionMask, tac.RenderTarget, tac.MipLevel, tac.SubElement);
+
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvReconstructedPrevNearestDepth, Fsr3ShaderIDs.UavReconstructedPrevNearestDepth);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvDilatedDepth, Fsr3ShaderIDs.UavDilatedDepth);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvPrevDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex ^ 1]);
+
+            commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3Upscaler, Constants, 0, Marshal.SizeOf<Fsr3Upscaler.UpscalerConstants>());
+            
+            commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1);
+        }
+    }
+
+    internal class Fsr3UpscalerLockPass : Fsr3UpscalerPass
+    {
+        public Fsr3UpscalerLockPass(Fsr3Upscaler.ContextDescription contextDescription, Fsr3UpscalerResources resources, ComputeBuffer constants)
+            : base(contextDescription, resources, constants)
+        {
+            InitComputeShader("lock_pass", contextDescription.Shaders.lockPass);
+        }
+
+        public override void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY)
+        {
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvLockInputLuma, Fsr3ShaderIDs.UavLockInputLuma);
+            commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3Upscaler, Constants, 0, Marshal.SizeOf<Fsr3Upscaler.UpscalerConstants>());
+            
+            commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1);
+        }
+    }
+    
+    internal class Fsr3UpscalerAccumulatePass : Fsr3UpscalerPass
+    {
+        private const string SharpeningKeyword = "FFX_FSR3UPSCALER_OPTION_APPLY_SHARPENING";
+    
+#if UNITY_2021_2_OR_NEWER
+        private readonly LocalKeyword _sharpeningKeyword;
+#endif
+        
+        public Fsr3UpscalerAccumulatePass(Fsr3Upscaler.ContextDescription contextDescription, Fsr3UpscalerResources resources, ComputeBuffer constants)
+            : base(contextDescription, resources, constants)
+        {
+            InitComputeShader("accumulate_pass", contextDescription.Shaders.accumulatePass);
+#if UNITY_2021_2_OR_NEWER
+            _sharpeningKeyword = new LocalKeyword(ComputeShader, SharpeningKeyword);
+#endif
+        }
+
+        public override void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY)
+        {
+#if UNITY_2021_2_OR_NEWER
+            if (dispatchParams.EnableSharpening)
+                commandBuffer.EnableKeyword(ComputeShader, _sharpeningKeyword);
+            else
+                commandBuffer.DisableKeyword(ComputeShader, _sharpeningKeyword);
+#else
+            if (dispatchParams.EnableSharpening)
+                commandBuffer.EnableShaderKeyword(SharpeningKeyword);
+            else
+                commandBuffer.DisableShaderKeyword(SharpeningKeyword);
+#endif
+            
+            if ((ContextDescription.Flags & Fsr3Upscaler.InitializationFlags.EnableDisplayResolutionMotionVectors) == 0)
+            {
+                commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvDilatedMotionVectors, Resources.DilatedMotionVectors[frameIndex]);
+            }
+            else
+            {
+                ref var motionVectors = ref dispatchParams.MotionVectors;
+                commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputMotionVectors, motionVectors.RenderTarget, motionVectors.MipLevel, motionVectors.SubElement);
+            }
+
+            ref var exposure = ref dispatchParams.Exposure;
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputExposure, exposure.RenderTarget, exposure.MipLevel, exposure.SubElement);
+
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvDilatedReactiveMasks, Fsr3ShaderIDs.UavDilatedReactiveMasks);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInternalUpscaled, Resources.InternalUpscaled[frameIndex ^ 1]);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvLockStatus, Resources.LockStatus[frameIndex ^ 1]);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvPreparedInputColor, Fsr3ShaderIDs.UavPreparedInputColor);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvLanczosLut, Resources.LanczosLut);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvUpscaleMaximumBiasLut, Resources.MaximumBiasLut);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvSceneLuminanceMips, Resources.SceneLuminance);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvAutoExposure, Resources.AutoExposure);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvLumaHistory, Resources.LumaHistory[frameIndex ^ 1]);
+
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavInternalUpscaled, Resources.InternalUpscaled[frameIndex]);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavLockStatus, Resources.LockStatus[frameIndex]);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavLumaHistory, Resources.LumaHistory[frameIndex]);
+            
+            ref var output = ref dispatchParams.Output;
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavUpscaledOutput, output.RenderTarget, output.MipLevel, output.SubElement);
+
+            commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3Upscaler, Constants, 0, Marshal.SizeOf<Fsr3Upscaler.UpscalerConstants>());
+            
+            commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1);
+        }
+    }
+
+    internal class Fsr3UpscalerSharpenPass : Fsr3UpscalerPass
+    {
+        private readonly ComputeBuffer _rcasConstants;
+
+        public Fsr3UpscalerSharpenPass(Fsr3Upscaler.ContextDescription contextDescription, Fsr3UpscalerResources resources, ComputeBuffer constants, ComputeBuffer rcasConstants)
+            : base(contextDescription, resources, constants)
+        {
+            _rcasConstants = rcasConstants;
+            
+            InitComputeShader("rcas_pass", contextDescription.Shaders.sharpenPass);
+        }
+
+        public override void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY)
+        {
+            ref var exposure = ref dispatchParams.Exposure;
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputExposure, exposure.RenderTarget, exposure.MipLevel, exposure.SubElement);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvRcasInput, Resources.InternalUpscaled[frameIndex]);
+            
+            ref var output = ref dispatchParams.Output;
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavUpscaledOutput, output.RenderTarget, output.MipLevel, output.SubElement);
+
+            commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3Upscaler, Constants, 0, Marshal.SizeOf<Fsr3Upscaler.UpscalerConstants>());
+            commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbRcas, _rcasConstants, 0, Marshal.SizeOf<Fsr3Upscaler.RcasConstants>());
+
+            commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1);
+        }
+    }
+
+    internal class Fsr3UpscalerGenerateReactivePass : Fsr3UpscalerPass
+    {
+        private readonly ComputeBuffer _generateReactiveConstants;
+
+        public Fsr3UpscalerGenerateReactivePass(Fsr3Upscaler.ContextDescription contextDescription, Fsr3UpscalerResources resources, ComputeBuffer generateReactiveConstants)
+            : base(contextDescription, resources, null)
+        {
+            _generateReactiveConstants = generateReactiveConstants;
+            
+            InitComputeShader("autogen_reactive_pass", contextDescription.Shaders.autoGenReactivePass);
+        }
+
+        public override void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY)
+        {
+        }
+
+        public void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.GenerateReactiveDescription dispatchParams, int dispatchX, int dispatchY)
+        {
+            ref var opaqueOnly = ref dispatchParams.ColorOpaqueOnly;
+            ref var color = ref dispatchParams.ColorPreUpscale;
+            ref var reactive = ref dispatchParams.OutReactive;
+            
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvOpaqueOnly, opaqueOnly.RenderTarget, opaqueOnly.MipLevel, opaqueOnly.SubElement);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputColor, color.RenderTarget, color.MipLevel, color.SubElement);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavAutoReactive, reactive.RenderTarget, reactive.MipLevel, reactive.SubElement);
+            
+            commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbGenReactive, _generateReactiveConstants, 0, Marshal.SizeOf<Fsr3Upscaler.GenerateReactiveConstants>());
+            
+            commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1);
+        }
+    }
+
+    internal class Fsr3UpscalerTcrAutogeneratePass : Fsr3UpscalerPass
+    {
+        private readonly ComputeBuffer _tcrAutogenerateConstants;
+
+        public Fsr3UpscalerTcrAutogeneratePass(Fsr3Upscaler.ContextDescription contextDescription, Fsr3UpscalerResources resources, ComputeBuffer constants, ComputeBuffer tcrAutogenerateConstants)
+            : base(contextDescription, resources, constants)
+        {
+            _tcrAutogenerateConstants = tcrAutogenerateConstants;
+            
+            InitComputeShader("tcr_autogen_pass", contextDescription.Shaders.tcrAutoGenPass);
+        }
+
+        public override void ScheduleDispatch(CommandBuffer commandBuffer, Fsr3Upscaler.DispatchDescription dispatchParams, int frameIndex, int dispatchX, int dispatchY)
+        {
+            ref var color = ref dispatchParams.Color;
+            ref var motionVectors = ref dispatchParams.MotionVectors;
+            ref var opaqueOnly = ref dispatchParams.ColorOpaqueOnly;
+            ref var reactive = ref dispatchParams.Reactive;
+            ref var tac = ref dispatchParams.TransparencyAndComposition;
+            
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvOpaqueOnly, opaqueOnly.RenderTarget, opaqueOnly.MipLevel, opaqueOnly.SubElement);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputColor, color.RenderTarget, color.MipLevel, color.SubElement);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvInputMotionVectors, motionVectors.RenderTarget, motionVectors.MipLevel, motionVectors.SubElement);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvPrevColorPreAlpha, Resources.PrevPreAlpha[frameIndex ^ 1]);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvPrevColorPostAlpha, Resources.PrevPostAlpha[frameIndex ^ 1]);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvReactiveMask, reactive.RenderTarget, reactive.MipLevel, reactive.SubElement);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.SrvTransparencyAndCompositionMask, tac.RenderTarget, tac.MipLevel, tac.SubElement);
+
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavAutoReactive, Resources.AutoReactive);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavAutoComposition, Resources.AutoComposition);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavPrevColorPreAlpha, Resources.PrevPreAlpha[frameIndex]);
+            commandBuffer.SetComputeTextureParam(ComputeShader, KernelIndex, Fsr3ShaderIDs.UavPrevColorPostAlpha, Resources.PrevPostAlpha[frameIndex]);
+            
+            commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbFsr3Upscaler, Constants, 0, Marshal.SizeOf<Fsr3Upscaler.UpscalerConstants>());
+            commandBuffer.SetComputeConstantBufferParam(ComputeShader, Fsr3ShaderIDs.CbGenReactive, _tcrAutogenerateConstants, 0, Marshal.SizeOf<Fsr3Upscaler.GenerateReactiveConstants2>());
+            
+            commandBuffer.DispatchCompute(ComputeShader, KernelIndex, dispatchX, dispatchY, 1);
+        }
+    }
+}
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerPass.cs.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerPass.cs.meta
new file mode 100644
index 00000000..8ffd58ec
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerPass.cs.meta
@@ -0,0 +1,3 @@
+﻿fileFormatVersion: 2
+guid: cfd077da533b192458b0b548668776e7
+timeCreated: 1676885169
\ No newline at end of file
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerResources.cs b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerResources.cs
new file mode 100644
index 00000000..676867b3
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerResources.cs
@@ -0,0 +1,251 @@
+﻿// Copyright (c) 2023 Nico de Poel
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+using System;
+using UnityEngine;
+using UnityEngine.Experimental.Rendering;
+using UnityEngine.Rendering;
+
+namespace FidelityFX
+{
+    /// <summary>
+    /// Helper class for bundling and managing persistent resources required by the FSR3 Upscaler process.
+    /// This includes lookup tables, default fallback resources and double-buffered resources that get swapped between frames.
+    /// </summary>
+    internal class Fsr3UpscalerResources
+    {
+        public Texture2D DefaultExposure;
+        public Texture2D DefaultReactive;
+        public Texture2D LanczosLut;
+        public Texture2D MaximumBiasLut;
+        public RenderTexture SpdAtomicCounter;
+        public RenderTexture AutoExposure;
+        public RenderTexture SceneLuminance;
+        public RenderTexture AutoReactive;
+        public RenderTexture AutoComposition;
+        public readonly RenderTexture[] DilatedMotionVectors = new RenderTexture[2];
+        public readonly RenderTexture[] LockStatus = new RenderTexture[2];
+        public readonly RenderTexture[] InternalUpscaled = new RenderTexture[2];
+        public readonly RenderTexture[] LumaHistory = new RenderTexture[2];
+        public readonly RenderTexture[] PrevPreAlpha = new RenderTexture[2];
+        public readonly RenderTexture[] PrevPostAlpha = new RenderTexture[2];
+
+        public void Create(Fsr3Upscaler.ContextDescription contextDescription)
+        {
+            // Generate the data for the LUT
+            const int lanczos2LutWidth = 128;
+            float[] lanczos2Weights = new float[lanczos2LutWidth];
+            for (int currentLanczosWidthIndex = 0; currentLanczosWidthIndex < lanczos2LutWidth; ++currentLanczosWidthIndex)
+            {
+                float x = 2.0f * currentLanczosWidthIndex / (lanczos2LutWidth - 1);
+                float y = Fsr3Upscaler.Lanczos2(x);
+                lanczos2Weights[currentLanczosWidthIndex] = y;
+            }
+
+            float[] maximumBias = new float[MaximumBiasTextureWidth * MaximumBiasTextureHeight];
+            for (int i = 0; i < maximumBias.Length; ++i)
+            {
+                maximumBias[i] = MaximumBias[i] / 2.0f;
+            }
+
+            // Resource FSR3UPSCALER_LanczosLutData: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R16_SNORM, FFX_RESOURCE_FLAGS_NONE
+            // R16_SNorm textures are not supported by Unity on most platforms, strangely enough. So instead we use R32_SFloat and upload pre-normalized float data.
+            LanczosLut = new Texture2D(lanczos2LutWidth, 1, GraphicsFormat.R32_SFloat, TextureCreationFlags.None) { name = "FSR3UPSCALER_LanczosLutData" };
+            LanczosLut.SetPixelData(lanczos2Weights, 0);
+            LanczosLut.Apply();
+            
+            // Resource FSR3UPSCALER_MaximumUpsampleBias: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R16_SNORM, FFX_RESOURCE_FLAGS_NONE
+            MaximumBiasLut = new Texture2D(MaximumBiasTextureWidth, MaximumBiasTextureHeight, GraphicsFormat.R32_SFloat, TextureCreationFlags.None) { name = "FSR3UPSCALER_MaximumUpsampleBias" };
+            MaximumBiasLut.SetPixelData(maximumBias, 0);
+            MaximumBiasLut.Apply();
+            
+            // Resource FSR3UPSCALER_DefaultExposure: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R32G32_FLOAT, FFX_RESOURCE_FLAGS_NONE
+            DefaultExposure = new Texture2D(1, 1, GraphicsFormat.R32G32_SFloat, TextureCreationFlags.None) { name = "FSR3UPSCALER_DefaultExposure" };
+            DefaultExposure.SetPixel(0, 0, Color.clear);
+            DefaultExposure.Apply();
+
+            // Resource FSR3UPSCALER_DefaultReactivityMask: FFX_RESOURCE_USAGE_READ_ONLY, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_NONE
+            DefaultReactive = new Texture2D(1, 1, GraphicsFormat.R8_UNorm, TextureCreationFlags.None) { name = "FSR3UPSCALER_DefaultReactivityMask" };
+            DefaultReactive.SetPixel(0, 0, Color.clear);
+            DefaultReactive.Apply();
+            
+            // Resource FSR3UPSCALER_SpdAtomicCounter: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_UINT, FFX_RESOURCE_FLAGS_ALIASABLE
+            // Despite what the original FSR3 codebase says, this resource really isn't aliasable. Resetting this counter to 0 every frame breaks auto-exposure on MacOS Metal.
+            SpdAtomicCounter = new RenderTexture(1, 1, 0, GraphicsFormat.R32_UInt) { name = "FSR3UPSCALER_SpdAtomicCounter", enableRandomWrite = true };
+            SpdAtomicCounter.Create();
+
+            // Resource FSR3UPSCALER_AutoExposure: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32G32_FLOAT, FFX_RESOURCE_FLAGS_NONE
+            AutoExposure = new RenderTexture(1, 1, 0, GraphicsFormat.R32G32_SFloat) { name = "FSR3UPSCALER_AutoExposure", enableRandomWrite = true };
+            AutoExposure.Create();
+            
+            // Resource FSR3UPSCALER_ExposureMips: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE
+            // This is a rather special case: it's an aliasable resource, but because we require a mipmap chain and bind specific mip levels per shader, we can't easily use temporary RTs for this.
+            int w = contextDescription.MaxRenderSize.x / 2, h = contextDescription.MaxRenderSize.y / 2;
+            int mipCount = 1 + Mathf.FloorToInt(Mathf.Log(Math.Max(w, h), 2.0f));
+            SceneLuminance = new RenderTexture(w, h, 0, GraphicsFormat.R16_SFloat, mipCount) { name = "FSR3UPSCALER_ExposureMips", enableRandomWrite = true, useMipMap = true, autoGenerateMips = false };
+            SceneLuminance.Create();
+            
+            // Resources FSR3UPSCALER_InternalDilatedVelocity1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16_FLOAT, FFX_RESOURCE_FLAGS_NONE
+            CreateDoubleBufferedResource(DilatedMotionVectors, "FSR3UPSCALER_InternalDilatedVelocity", contextDescription.MaxRenderSize, GraphicsFormat.R16G16_SFloat);
+            
+            // Resources FSR3UPSCALER_LockStatus1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16_FLOAT, FFX_RESOURCE_FLAGS_NONE
+            CreateDoubleBufferedResource(LockStatus, "FSR3UPSCALER_LockStatus", contextDescription.DisplaySize, GraphicsFormat.R16G16_SFloat);
+            
+            // Resources FSR3UPSCALER_InternalUpscaled1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, FFX_RESOURCE_FLAGS_NONE
+            CreateDoubleBufferedResource(InternalUpscaled, "FSR3UPSCALER_InternalUpscaled", contextDescription.DisplaySize, GraphicsFormat.R16G16B16A16_SFloat);
+            
+            // Resources FSR3UPSCALER_LumaHistory1/2: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, FFX_RESOURCE_FLAGS_NONE
+            CreateDoubleBufferedResource(LumaHistory, "FSR3UPSCALER_LumaHistory", contextDescription.DisplaySize, GraphicsFormat.R8G8B8A8_UNorm);
+        }
+
+        public void CreateTcrAutogenResources(Fsr3Upscaler.ContextDescription contextDescription)
+        {
+            // Resource FSR3UPSCALER_AutoReactive: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_NONE
+            AutoReactive = new RenderTexture(contextDescription.MaxRenderSize.x, contextDescription.MaxRenderSize.y, 0, GraphicsFormat.R8_UNorm) { name = "FSR3UPSCALER_AutoReactive", enableRandomWrite = true };
+            AutoReactive.Create();
+
+            // Resource FSR3UPSCALER_AutoComposition: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_NONE
+            AutoComposition = new RenderTexture(contextDescription.MaxRenderSize.x, contextDescription.MaxRenderSize.y, 0, GraphicsFormat.R8_UNorm) { name = "FSR3UPSCALER_AutoComposition", enableRandomWrite = true };
+            AutoComposition.Create();
+
+            // Resources FSR3UPSCALER_PrevPreAlpha0/1: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R11G11B10_FLOAT, FFX_RESOURCE_FLAGS_NONE
+            CreateDoubleBufferedResource(PrevPreAlpha, "FSR3UPSCALER_PrevPreAlpha", contextDescription.MaxRenderSize, GraphicsFormat.B10G11R11_UFloatPack32);
+
+            // Resources FSR3UPSCALER_PrevPostAlpha0/1: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R11G11B10_FLOAT, FFX_RESOURCE_FLAGS_NONE
+            CreateDoubleBufferedResource(PrevPostAlpha, "FSR3UPSCALER_PrevPostAlpha", contextDescription.MaxRenderSize, GraphicsFormat.B10G11R11_UFloatPack32);
+        }
+        
+        // Set up shared aliasable resources, i.e. temporary render textures
+        // These do not need to persist between frames, but they do need to be available between passes
+        public static void CreateAliasableResources(CommandBuffer commandBuffer, Fsr3Upscaler.ContextDescription contextDescription, Fsr3Upscaler.DispatchDescription dispatchParams)
+        {
+            Vector2Int displaySize = contextDescription.DisplaySize;
+            Vector2Int maxRenderSize = contextDescription.MaxRenderSize;
+
+            // FSR3UPSCALER_ReconstructedPrevNearestDepth: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_UINT, FFX_RESOURCE_FLAGS_ALIASABLE
+            commandBuffer.GetTemporaryRT(Fsr3ShaderIDs.UavReconstructedPrevNearestDepth, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R32_UInt, 1, true);
+
+            // FSR3UPSCALER_DilatedDepth: FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R32_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE
+            commandBuffer.GetTemporaryRT(Fsr3ShaderIDs.UavDilatedDepth, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R32_SFloat, 1, true);
+
+            // FSR3UPSCALER_LockInputLuma: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE
+            commandBuffer.GetTemporaryRT(Fsr3ShaderIDs.UavLockInputLuma, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R16_SFloat, 1, true);
+            
+            // FSR3UPSCALER_DilatedReactiveMasks: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8G8_UNORM, FFX_RESOURCE_FLAGS_ALIASABLE
+            commandBuffer.GetTemporaryRT(Fsr3ShaderIDs.UavDilatedReactiveMasks, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R8G8_UNorm, 1, true);
+            
+            // FSR3UPSCALER_PreparedInputColor: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, FFX_RESOURCE_FLAGS_ALIASABLE
+            commandBuffer.GetTemporaryRT(Fsr3ShaderIDs.UavPreparedInputColor, maxRenderSize.x, maxRenderSize.y, 0, default, GraphicsFormat.R16G16B16A16_SFloat, 1, true);
+            
+            // FSR3UPSCALER_NewLocks: FFX_RESOURCE_USAGE_UAV, FFX_SURFACE_FORMAT_R8_UNORM, FFX_RESOURCE_FLAGS_ALIASABLE
+            commandBuffer.GetTemporaryRT(Fsr3ShaderIDs.UavNewLocks, displaySize.x, displaySize.y, 0, default, GraphicsFormat.R8_UNorm, 1, true);
+        }
+
+        public static void DestroyAliasableResources(CommandBuffer commandBuffer)
+        {
+            // Release all of the aliasable resources used this frame
+            commandBuffer.ReleaseTemporaryRT(Fsr3ShaderIDs.UavReconstructedPrevNearestDepth);
+            commandBuffer.ReleaseTemporaryRT(Fsr3ShaderIDs.UavDilatedDepth);
+            commandBuffer.ReleaseTemporaryRT(Fsr3ShaderIDs.UavLockInputLuma);
+            commandBuffer.ReleaseTemporaryRT(Fsr3ShaderIDs.UavDilatedReactiveMasks);
+            commandBuffer.ReleaseTemporaryRT(Fsr3ShaderIDs.UavPreparedInputColor);
+            commandBuffer.ReleaseTemporaryRT(Fsr3ShaderIDs.UavNewLocks);
+        }
+
+        private static void CreateDoubleBufferedResource(RenderTexture[] resource, string name, Vector2Int size, GraphicsFormat format)
+        {
+            for (int i = 0; i < 2; ++i)
+            {
+                resource[i] = new RenderTexture(size.x, size.y, 0, format) { name = name + (i + 1), enableRandomWrite = true };
+                resource[i].Create();
+            }
+        }
+
+        public void Destroy()
+        {
+            DestroyTcrAutogenResources();
+            
+            DestroyResource(LumaHistory);
+            DestroyResource(InternalUpscaled);
+            DestroyResource(LockStatus);
+            DestroyResource(DilatedMotionVectors);
+            DestroyResource(ref SceneLuminance);
+            DestroyResource(ref AutoExposure);
+            DestroyResource(ref DefaultReactive);
+            DestroyResource(ref DefaultExposure);
+            DestroyResource(ref MaximumBiasLut);
+            DestroyResource(ref LanczosLut);
+        }
+
+        public void DestroyTcrAutogenResources()
+        {
+            DestroyResource(PrevPostAlpha);
+            DestroyResource(PrevPreAlpha);
+            DestroyResource(ref AutoComposition);
+            DestroyResource(ref AutoReactive);
+        }
+        
+        private static void DestroyResource(ref Texture2D resource)
+        {
+            if (resource == null)
+                return;
+            
+            UnityEngine.Object.Destroy(resource);
+            resource = null;
+        }
+
+        private static void DestroyResource(ref RenderTexture resource)
+        {
+            if (resource == null)
+                return;
+            
+            resource.Release();
+            resource = null;
+        }
+
+        private static void DestroyResource(RenderTexture[] resource)
+        {
+            for (int i = 0; i < resource.Length; ++i)
+                DestroyResource(ref resource[i]);
+        }
+
+        private const int MaximumBiasTextureWidth = 16;
+        private const int MaximumBiasTextureHeight = 16;
+        private static readonly float[] MaximumBias =  
+        {
+            2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.876f,	1.809f,	1.772f,	1.753f,	1.748f,
+            2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.869f,	1.801f,	1.764f,	1.745f,	1.739f,
+            2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.976f,	1.841f,	1.774f,	1.737f,	1.716f,	1.71f,
+            2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.914f,	1.784f,	1.716f,	1.673f,	1.649f,	1.641f,
+            2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.793f,	1.676f,	1.604f,	1.562f,	1.54f,	1.533f,
+            2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.802f,	1.619f,	1.536f,	1.492f,	1.467f,	1.454f,	1.449f,
+            2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.812f,	1.575f,	1.496f,	1.456f,	1.432f,	1.416f,	1.408f,	1.405f,
+            2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.555f,	1.479f,	1.438f,	1.413f,	1.398f,	1.387f,	1.381f,	1.379f,
+            2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.812f,	1.555f,	1.474f,	1.43f,	1.404f,	1.387f,	1.376f,	1.368f,	1.363f,	1.362f,
+            2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.802f,	1.575f,	1.479f,	1.43f,	1.401f,	1.382f,	1.369f,	1.36f,	1.354f,	1.351f,	1.35f,
+            2.0f,	2.0f,	1.976f,	1.914f,	1.793f,	1.619f,	1.496f,	1.438f,	1.404f,	1.382f,	1.367f,	1.357f,	1.349f,	1.344f,	1.341f,	1.34f,
+            1.876f,	1.869f,	1.841f,	1.784f,	1.676f,	1.536f,	1.456f,	1.413f,	1.387f,	1.369f,	1.357f,	1.347f,	1.341f,	1.336f,	1.333f,	1.332f,
+            1.809f,	1.801f,	1.774f,	1.716f,	1.604f,	1.492f,	1.432f,	1.398f,	1.376f,	1.36f,	1.349f,	1.341f,	1.335f,	1.33f,	1.328f,	1.327f,
+            1.772f,	1.764f,	1.737f,	1.673f,	1.562f,	1.467f,	1.416f,	1.387f,	1.368f,	1.354f,	1.344f,	1.336f,	1.33f,	1.326f,	1.323f,	1.323f,
+            1.753f,	1.745f,	1.716f,	1.649f,	1.54f,	1.454f,	1.408f,	1.381f,	1.363f,	1.351f,	1.341f,	1.333f,	1.328f,	1.323f,	1.321f,	1.32f,
+            1.748f,	1.739f,	1.71f,	1.641f,	1.533f,	1.449f,	1.405f,	1.379f,	1.362f,	1.35f,	1.34f,	1.332f,	1.327f,	1.323f,	1.32f,	1.319f,
+        };
+    }
+}
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerResources.cs.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerResources.cs.meta
new file mode 100644
index 00000000..bf03a071
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Fsr3UpscalerResources.cs.meta
@@ -0,0 +1,3 @@
+﻿fileFormatVersion: 2
+guid: ec6c0c34c7b11f041885ddee4aa72818
+timeCreated: 1677236102
\ No newline at end of file
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Resources.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Resources.meta
new file mode 100644
index 00000000..d0892b27
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Resources.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: 0599fe7b8fac94a4b81d1cac815f887f
+folderAsset: yes
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Resources/Fsr3UpscalerAssets.asset b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Resources/Fsr3UpscalerAssets.asset
new file mode 100644
index 00000000..90afd1a5
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Resources/Fsr3UpscalerAssets.asset
@@ -0,0 +1,23 @@
+%YAML 1.1
+%TAG !u! tag:unity3d.com,2011:
+--- !u!114 &11400000
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 0}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: aaeb3d821f826d44b84289a2dd23f90e, type: 3}
+  m_Name: Fsr3UpscalerAssets
+  m_EditorClassIdentifier: 
+  shaders:
+    computeLuminancePyramidPass: {fileID: 7200000, guid: d253be05abcdc80428503d3e4cce3a36, type: 3}
+    reconstructPreviousDepthPass: {fileID: 7200000, guid: 4f59e5b9179d74844ae06a30ae1e0629, type: 3}
+    depthClipPass: {fileID: 7200000, guid: 20e44016ed34b0d4b8de499d1b566c69, type: 3}
+    lockPass: {fileID: 7200000, guid: a135306e6d1857e43a86ef20db2a47fe, type: 3}
+    accumulatePass: {fileID: 7200000, guid: c9b45f0ae7673694ba57a4aadfe212e9, type: 3}
+    sharpenPass: {fileID: 7200000, guid: 7aaf5cfff022de2499e9b0412f947f6c, type: 3}
+    autoGenReactivePass: {fileID: 7200000, guid: 5716b91fdaa4e9e439df6b96a796fe6e, type: 3}
+    tcrAutoGenPass: {fileID: 7200000, guid: 75cdc6ef23f08ed498d4da511923fcea, type: 3}
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Resources/Fsr3UpscalerAssets.asset.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Resources/Fsr3UpscalerAssets.asset.meta
new file mode 100644
index 00000000..a9844463
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/Resources/Fsr3UpscalerAssets.asset.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: fc3f2f6a88715cf4394875ecc1c77e32
+NativeFormatImporter:
+  externalObjects: {}
+  mainObjectFileID: 11400000
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_accumulate_pass.compute b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_accumulate_pass.compute
new file mode 100644
index 00000000..5cbfb807
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_accumulate_pass.compute
@@ -0,0 +1,41 @@
+// Copyright (c) 2023 Nico de Poel
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma kernel CS
+
+#pragma multi_compile_local __ FFX_HALF
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_APPLY_SHARPENING
+
+#pragma multi_compile_local __ UNITY_FSR3UPSCALER_HDRP
+
+#include "ffx_fsr3upscaler_unity_common.cginc"
+
+// Ensure the correct value is defined for this keyword, as it is used to select one of multiple sampler functions
+#ifdef FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE
+#undef FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE
+#define FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE 1
+#endif
+
+#include "shaders/ffx_fsr3upscaler_accumulate_pass.hlsl"
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_accumulate_pass.compute.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_accumulate_pass.compute.meta
new file mode 100644
index 00000000..dbe5282e
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_accumulate_pass.compute.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: c9b45f0ae7673694ba57a4aadfe212e9
+ComputeShaderImporter:
+  externalObjects: {}
+  preprocessorOverride: 0
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_autogen_reactive_pass.compute b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_autogen_reactive_pass.compute
new file mode 100644
index 00000000..e13c0012
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_autogen_reactive_pass.compute
@@ -0,0 +1,32 @@
+// Copyright (c) 2023 Nico de Poel
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma kernel CS
+
+#pragma multi_compile_local __ FFX_HALF
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH
+
+#pragma multi_compile_local __ UNITY_FSR3UPSCALER_HDRP
+
+#include "ffx_fsr3upscaler_unity_common.cginc"
+
+#include "shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl"
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_autogen_reactive_pass.compute.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_autogen_reactive_pass.compute.meta
new file mode 100644
index 00000000..1df041bc
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_autogen_reactive_pass.compute.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: 5716b91fdaa4e9e439df6b96a796fe6e
+ComputeShaderImporter:
+  externalObjects: {}
+  preprocessorOverride: 0
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_compute_luminance_pyramid_pass.compute b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_compute_luminance_pyramid_pass.compute
new file mode 100644
index 00000000..d5903c02
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_compute_luminance_pyramid_pass.compute
@@ -0,0 +1,42 @@
+// Copyright (c) 2023 Nico de Poel
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma kernel CS
+
+#pragma multi_compile_local __ FFX_HALF
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH
+
+#pragma multi_compile_local __ UNITY_FSR3UPSCALER_HDRP
+
+#include "ffx_fsr3upscaler_unity_common.cginc"
+
+// Wave operations require shader model 6.0; this can only be enabled when using DXC on D3D12
+// These pragmas are commented out by default as Unity will sometimes ignore the #if's and try to enable these features anyway.
+// Uncomment the below lines if you intend to try wave operations on DX12 with the DXC compiler.
+//#if defined(UNITY_COMPILER_DXC) && defined(SHADER_API_D3D12)
+//#pragma require WaveBasic   // Required for WaveGetLaneIndex
+//#pragma require WaveBallot  // Required for WaveReadLaneAt
+//#else
+#define FFX_SPD_NO_WAVE_OPERATIONS
+//#endif
+
+#include "shaders/ffx_fsr3upscaler_compute_luminance_pyramid_pass.hlsl"
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_compute_luminance_pyramid_pass.compute.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_compute_luminance_pyramid_pass.compute.meta
new file mode 100644
index 00000000..9e002c01
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_compute_luminance_pyramid_pass.compute.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: d253be05abcdc80428503d3e4cce3a36
+ComputeShaderImporter:
+  externalObjects: {}
+  preprocessorOverride: 0
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_depth_clip_pass.compute b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_depth_clip_pass.compute
new file mode 100644
index 00000000..0ccd388b
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_depth_clip_pass.compute
@@ -0,0 +1,32 @@
+// Copyright (c) 2023 Nico de Poel
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma kernel CS
+
+#pragma multi_compile_local __ FFX_HALF
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH
+
+#pragma multi_compile_local __ UNITY_FSR3UPSCALER_HDRP
+
+#include "ffx_fsr3upscaler_unity_common.cginc"
+
+#include "shaders/ffx_fsr3upscaler_depth_clip_pass.hlsl"
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_depth_clip_pass.compute.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_depth_clip_pass.compute.meta
new file mode 100644
index 00000000..d695f481
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_depth_clip_pass.compute.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: 20e44016ed34b0d4b8de499d1b566c69
+ComputeShaderImporter:
+  externalObjects: {}
+  preprocessorOverride: 0
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_lock_pass.compute b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_lock_pass.compute
new file mode 100644
index 00000000..e38ad99a
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_lock_pass.compute
@@ -0,0 +1,30 @@
+// Copyright (c) 2023 Nico de Poel
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma kernel CS
+
+#pragma multi_compile_local __ FFX_HALF
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH
+
+#include "ffx_fsr3upscaler_unity_common.cginc"
+
+#include "shaders/ffx_fsr3upscaler_lock_pass.hlsl"
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_lock_pass.compute.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_lock_pass.compute.meta
new file mode 100644
index 00000000..c01e009f
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_lock_pass.compute.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: a135306e6d1857e43a86ef20db2a47fe
+ComputeShaderImporter:
+  externalObjects: {}
+  preprocessorOverride: 0
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_rcas_pass.compute b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_rcas_pass.compute
new file mode 100644
index 00000000..04c2fe44
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_rcas_pass.compute
@@ -0,0 +1,29 @@
+// Copyright (c) 2023 Nico de Poel
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma kernel CS
+
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH
+
+#include "ffx_fsr3upscaler_unity_common.cginc"
+
+#include "shaders/ffx_fsr3upscaler_rcas_pass.hlsl"
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_rcas_pass.compute.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_rcas_pass.compute.meta
new file mode 100644
index 00000000..cd12641b
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_rcas_pass.compute.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: 7aaf5cfff022de2499e9b0412f947f6c
+ComputeShaderImporter:
+  externalObjects: {}
+  preprocessorOverride: 0
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_reconstruct_previous_depth_pass.compute b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_reconstruct_previous_depth_pass.compute
new file mode 100644
index 00000000..ee2f276e
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_reconstruct_previous_depth_pass.compute
@@ -0,0 +1,33 @@
+// Copyright (c) 2023 Nico de Poel
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma kernel CS
+
+#pragma multi_compile_local __ FFX_HALF
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH
+
+#pragma multi_compile_local __ UNITY_FSR3UPSCALER_HDRP
+
+#include "ffx_fsr3upscaler_unity_common.cginc"
+
+#include "shaders/ffx_fsr3upscaler_reconstruct_previous_depth_pass.hlsl"
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_reconstruct_previous_depth_pass.compute.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_reconstruct_previous_depth_pass.compute.meta
new file mode 100644
index 00000000..1053c34a
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_reconstruct_previous_depth_pass.compute.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: 4f59e5b9179d74844ae06a30ae1e0629
+ComputeShaderImporter:
+  externalObjects: {}
+  preprocessorOverride: 0
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_tcr_autogen_pass.compute b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_tcr_autogen_pass.compute
new file mode 100644
index 00000000..63389183
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_tcr_autogen_pass.compute
@@ -0,0 +1,32 @@
+// Copyright (c) 2023 Nico de Poel
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma kernel CS
+
+#pragma multi_compile_local __ FFX_HALF
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS
+#pragma multi_compile_local __ FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH
+
+#pragma multi_compile_local __ UNITY_FSR3UPSCALER_HDRP
+
+#include "ffx_fsr3upscaler_unity_common.cginc"
+
+#include "shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl"
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_tcr_autogen_pass.compute.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_tcr_autogen_pass.compute.meta
new file mode 100644
index 00000000..ad42fbb7
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_tcr_autogen_pass.compute.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: 75cdc6ef23f08ed498d4da511923fcea
+ComputeShaderImporter:
+  externalObjects: {}
+  preprocessorOverride: 0
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_unity_common.cginc b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_unity_common.cginc
new file mode 100644
index 00000000..758bb0c9
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_unity_common.cginc
@@ -0,0 +1,82 @@
+// Copyright (c) 2023 Nico de Poel
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+// Suppress a few warnings produced by FFX's HLSL code
+#pragma warning(disable: 3078)      // Loop control variable conflicts
+#pragma warning(disable: 3203)      // Signed/unsigned mismatch
+
+#define FFX_GPU         // Compiling for GPU
+#define FFX_HLSL        // Compile for plain HLSL
+
+// Use the DXC shader compiler on modern graphics APIs to enable a few advanced features
+// The DXC-related pragmas are disabled by default, as DXC doesn't support all platforms yet and will break on some platforms when enabled.
+// Consider this to be an experimental feature. If you want to benefit from 16-bit floating point and wave operations, and don't care about supporting older graphics APIs, then it's worth a try. 
+//#if defined(SHADER_API_D3D12) || defined(SHADER_API_VULKAN) || defined(SHADER_API_METAL)
+//#pragma use_dxc   // Using DXC will currently break DX11 support since DX11 and DX12 share the same shader bytecode in Unity.
+//#endif
+
+// Enable half precision data types on platforms that support it
+//#if defined(UNITY_COMPILER_DXC) && defined(FFX_HALF)
+//#pragma require Native16Bit
+//#endif
+
+// Hack to work around the lack of texture atomics on Metal
+#if defined(SHADER_API_METAL)
+#define InterlockedAdd(dest, val, orig)     { (orig) = (dest); (dest) += (val); }
+#define InterlockedMin(dest, val)           { (dest) = min((dest), (val)); }
+#define InterlockedMax(dest, val)           { (dest) = max((dest), (val)); }
+#endif
+
+// Workaround for HDRP using texture arrays for its camera buffers on some platforms
+// The below defines are copied from: Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/TextureXR.hlsl
+#if defined(UNITY_FSR3UPSCALER_HDRP)
+    // Must be in sync with C# with property useTexArray in TextureXR.cs
+    #if ((defined(SHADER_API_D3D11) || defined(SHADER_API_D3D12)) && !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_GAMECORE)) || defined(SHADER_API_PSSL) || defined(SHADER_API_VULKAN)
+        #define UNITY_TEXTURE2D_X_ARRAY_SUPPORTED
+    #endif
+
+    // Control if TEXTURE2D_X macros will expand to texture arrays
+    #if defined(UNITY_TEXTURE2D_X_ARRAY_SUPPORTED) && !defined(DISABLE_TEXTURE2D_X_ARRAY)
+        #define USE_TEXTURE2D_X_AS_ARRAY
+    #endif
+
+    // Early defines for single-pass instancing
+    #if defined(STEREO_INSTANCING_ON) && defined(UNITY_TEXTURE2D_X_ARRAY_SUPPORTED)
+        #define UNITY_STEREO_INSTANCING_ENABLED
+    #endif
+
+    // Helper macros to handle XR single-pass with Texture2DArray
+    #if defined(USE_TEXTURE2D_X_AS_ARRAY)
+
+        // Only single-pass stereo instancing used array indexing
+        #if defined(UNITY_STEREO_INSTANCING_ENABLED)
+            #define SLICE_ARRAY_INDEX   unity_StereoEyeIndex
+        #else
+            #define SLICE_ARRAY_INDEX  0
+        #endif
+
+        // Declare and sample camera buffers as texture arrays
+        #define UNITY_FSR3_TEX2D(type)      Texture2DArray<type>
+        #define UNITY_FSR3_RWTEX2D(type)    RWTexture2DArray<type>
+        #define UNITY_FSR3_POS(pxPos)       FfxUInt32x3(pxPos, SLICE_ARRAY_INDEX)
+        #define UNITY_FSR3_UV(uv)           FfxFloat32x3(uv, SLICE_ARRAY_INDEX)
+        
+    #endif
+#endif
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_unity_common.cginc.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_unity_common.cginc.meta
new file mode 100644
index 00000000..5a68b6ce
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/ffx_fsr3upscaler_unity_common.cginc.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: 3ce00ba677bb7e14bb91772fd68bfe6b
+ShaderIncludeImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders.meta
new file mode 100644
index 00000000..8a4ff2bd
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: 8364d4f86c613ec4d999d062f5f773b8
+folderAsset: yes
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl
new file mode 100644
index 00000000..d2f1b322
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl
@@ -0,0 +1,79 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE                         0
+#define FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS                 1
+#if FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+#define FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS                 2
+#else
+#define FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS                   2
+#endif
+#define FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED                      3
+#define FSR3UPSCALER_BIND_SRV_LOCK_STATUS                            4
+#define FSR3UPSCALER_BIND_SRV_PREPARED_INPUT_COLOR                   5
+#define FSR3UPSCALER_BIND_SRV_LANCZOS_LUT                            6
+#define FSR3UPSCALER_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT               7
+#define FSR3UPSCALER_BIND_SRV_SCENE_LUMINANCE_MIPS                   8
+#define FSR3UPSCALER_BIND_SRV_AUTO_EXPOSURE                          9
+#define FSR3UPSCALER_BIND_SRV_LUMA_HISTORY                           10
+
+#define FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED                      0
+#define FSR3UPSCALER_BIND_UAV_LOCK_STATUS                            1
+#define FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT                        2
+#define FSR3UPSCALER_BIND_UAV_NEW_LOCKS                              3
+#define FSR3UPSCALER_BIND_UAV_LUMA_HISTORY                           4
+
+#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER                            0
+
+#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h"
+#include "fsr3upscaler/ffx_fsr3upscaler_common.h"
+#include "fsr3upscaler/ffx_fsr3upscaler_sample.h"
+#include "fsr3upscaler/ffx_fsr3upscaler_upsample.h"
+#include "fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h"
+#include "fsr3upscaler/ffx_fsr3upscaler_reproject.h"
+#include "fsr3upscaler/ffx_fsr3upscaler_accumulate.h"
+
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH
+#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8
+#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT
+#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8
+#endif // FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH
+#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1
+#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH
+#ifndef FFX_FSR3UPSCALER_NUM_THREADS
+#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)]
+#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS
+
+FFX_PREFER_WAVE64
+FFX_FSR3UPSCALER_NUM_THREADS
+FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT
+void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID)
+{
+    const uint GroupRows = (uint(DisplaySize().y) + FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT - 1) / FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT;
+    uGroupId.y = GroupRows - uGroupId.y - 1;
+
+    uint2 uDispatchThreadId = uGroupId * uint2(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT) + uGroupThreadId;
+
+    Accumulate(uDispatchThreadId);
+}
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl.meta
new file mode 100644
index 00000000..80f209e7
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_accumulate_pass.hlsl.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: 507ab779c38eddb429cdcedf9c108d1b
+ShaderIncludeImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl
new file mode 100644
index 00000000..0d6e2ebb
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl
@@ -0,0 +1,77 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#define FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY                     0
+#define FSR3UPSCALER_BIND_SRV_INPUT_COLOR                           1
+
+#define FSR3UPSCALER_BIND_UAV_AUTOREACTIVE                          0
+
+#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER                                   0
+#define FSR3UPSCALER_BIND_CB_REACTIVE                               1
+
+#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h"
+#include "fsr3upscaler/ffx_fsr3upscaler_common.h"
+
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH
+#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8
+#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT
+#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8
+#endif // FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH
+#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1
+#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH
+#ifndef FFX_FSR3UPSCALER_NUM_THREADS
+#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)]
+#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS
+
+FFX_FSR3UPSCALER_NUM_THREADS
+FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT
+void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID)
+{
+    uint2 uDispatchThreadId = uGroupId * uint2(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT) + uGroupThreadId;
+
+    float3 ColorPreAlpha    = LoadOpaqueOnly( FFX_MIN16_I2(uDispatchThreadId) ).rgb;
+    float3 ColorPostAlpha   = LoadInputColor(uDispatchThreadId).rgb;
+    
+    if (GenReactiveFlags() & FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_TONEMAP)
+    {
+        ColorPreAlpha = Tonemap(ColorPreAlpha);
+        ColorPostAlpha = Tonemap(ColorPostAlpha);
+    }
+
+    if (GenReactiveFlags() & FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP)
+    {
+        ColorPreAlpha = InverseTonemap(ColorPreAlpha);
+        ColorPostAlpha = InverseTonemap(ColorPostAlpha);
+    }
+
+    float out_reactive_value = 0.f;
+    float3 delta = abs(ColorPostAlpha - ColorPreAlpha);
+    
+    out_reactive_value = (GenReactiveFlags() & FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX) ? max(delta.x, max(delta.y, delta.z)) : length(delta);
+    out_reactive_value *= GenReactiveScale();
+
+    out_reactive_value = (GenReactiveFlags() & FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_THRESHOLD) ? (out_reactive_value < GenReactiveThreshold() ? 0 : GenReactiveBinaryValue()) : out_reactive_value;
+
+    rw_output_autoreactive[uDispatchThreadId] = out_reactive_value;
+}
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl.meta
new file mode 100644
index 00000000..c55f004c
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_autogen_reactive_pass.hlsl.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: 52cdb7a7c30cb614984908593ed19082
+ShaderIncludeImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_compute_luminance_pyramid_pass.hlsl b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_compute_luminance_pyramid_pass.hlsl
new file mode 100644
index 00000000..93b73326
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_compute_luminance_pyramid_pass.hlsl
@@ -0,0 +1,55 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#define FSR3UPSCALER_BIND_SRV_INPUT_COLOR                     0
+
+#define FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC               0
+#define FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE        1
+#define FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5                  2
+#define FSR3UPSCALER_BIND_UAV_AUTO_EXPOSURE                   3
+
+#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER                     0
+#define FSR3UPSCALER_BIND_CB_SPD                              1
+
+#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h"
+#include "fsr3upscaler/ffx_fsr3upscaler_common.h"
+#include "fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h"
+
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH
+#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 256
+#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT
+#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 1
+#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH
+#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1
+#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH
+#ifndef FFX_FSR3UPSCALER_NUM_THREADS
+#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)]
+#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS
+
+FFX_FSR3UPSCALER_NUM_THREADS
+FFX_FSR3UPSCALER_EMBED_CB2_ROOTSIG_CONTENT
+void CS(uint3 WorkGroupId : SV_GroupID, uint LocalThreadIndex : SV_GroupIndex)
+{
+    ComputeAutoExposure(WorkGroupId, LocalThreadIndex);
+}
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_compute_luminance_pyramid_pass.hlsl.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_compute_luminance_pyramid_pass.hlsl.meta
new file mode 100644
index 00000000..508b43ec
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_compute_luminance_pyramid_pass.hlsl.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: 2d149b52ba0f5bb468a94a71dbbcb66f
+ShaderIncludeImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_depth_clip_pass.hlsl b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_depth_clip_pass.hlsl
new file mode 100644
index 00000000..70cc7ba0
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_depth_clip_pass.hlsl
@@ -0,0 +1,67 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#define FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH      0
+#define FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS                1
+#define FSR3UPSCALER_BIND_SRV_DILATED_DEPTH                         2
+#define FSR3UPSCALER_BIND_SRV_REACTIVE_MASK                         3
+#define FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK     4
+#define FSR3UPSCALER_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS       5
+#define FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS                  6
+#define FSR3UPSCALER_BIND_SRV_INPUT_COLOR                           7
+#define FSR3UPSCALER_BIND_SRV_INPUT_DEPTH                           8
+#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE                        9
+
+#define FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS                0
+#define FSR3UPSCALER_BIND_UAV_PREPARED_INPUT_COLOR                  1
+
+#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER                                   0
+
+#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h"
+#include "fsr3upscaler/ffx_fsr3upscaler_common.h"
+#include "fsr3upscaler/ffx_fsr3upscaler_sample.h"
+#include "fsr3upscaler/ffx_fsr3upscaler_depth_clip.h"
+
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH
+#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8
+#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT
+#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8
+#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH
+#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1
+#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH
+#ifndef FFX_FSR3UPSCALER_NUM_THREADS
+#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)]
+#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS
+
+FFX_PREFER_WAVE64
+FFX_FSR3UPSCALER_NUM_THREADS
+FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT
+void CS(
+    int2 iGroupId : SV_GroupID,
+    int2 iDispatchThreadId : SV_DispatchThreadID,
+    int2 iGroupThreadId : SV_GroupThreadID,
+    int iGroupIndex : SV_GroupIndex)
+{
+    DepthClip(iDispatchThreadId);
+}
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_depth_clip_pass.hlsl.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_depth_clip_pass.hlsl.meta
new file mode 100644
index 00000000..cde3a5ef
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_depth_clip_pass.hlsl.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: da435b71cf57e2247b80ae0f0f86d1f8
+ShaderIncludeImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_lock_pass.hlsl b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_lock_pass.hlsl
new file mode 100644
index 00000000..26b28f01
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_lock_pass.hlsl
@@ -0,0 +1,56 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#define FSR3UPSCALER_BIND_SRV_LOCK_INPUT_LUMA                       0
+
+#define FSR3UPSCALER_BIND_UAV_NEW_LOCKS                             0
+#define FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH      1
+
+#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER                                   0
+
+#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h"
+#include "fsr3upscaler/ffx_fsr3upscaler_common.h"
+#include "fsr3upscaler/ffx_fsr3upscaler_sample.h"
+#include "fsr3upscaler/ffx_fsr3upscaler_lock.h"
+
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH
+#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8
+#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT
+#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8
+#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH
+#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1
+#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH
+#ifndef FFX_FSR3UPSCALER_NUM_THREADS
+#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)]
+#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS
+
+FFX_PREFER_WAVE64
+FFX_FSR3UPSCALER_NUM_THREADS
+FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT
+void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID)
+{
+    uint2 uDispatchThreadId = uGroupId * uint2(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT) + uGroupThreadId;
+
+    ComputeLock(uDispatchThreadId);
+}
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_lock_pass.hlsl.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_lock_pass.hlsl.meta
new file mode 100644
index 00000000..45c99dc4
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_lock_pass.hlsl.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: 98d2cbbda5e90dd4ebd1d70abbb63a09
+ShaderIncludeImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_rcas_pass.hlsl b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_rcas_pass.hlsl
new file mode 100644
index 00000000..bebdeb38
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_rcas_pass.hlsl
@@ -0,0 +1,53 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE        0
+#define FSR3UPSCALER_BIND_SRV_RCAS_INPUT            1
+
+#define FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT       0
+
+#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER                   0
+#define FSR3UPSCALER_BIND_CB_RCAS                   1
+
+#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h"
+#include "fsr3upscaler/ffx_fsr3upscaler_common.h"
+#include "fsr3upscaler/ffx_fsr3upscaler_rcas.h"
+
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH
+#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 64
+#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT
+#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 1
+#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH
+#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1
+#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH
+#ifndef FFX_FSR3UPSCALER_NUM_THREADS
+#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)]
+#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS
+
+FFX_FSR3UPSCALER_NUM_THREADS
+FFX_FSR3UPSCALER_EMBED_CB2_ROOTSIG_CONTENT
+void CS(uint3 LocalThreadId : SV_GroupThreadID, uint3 WorkGroupId : SV_GroupID, uint3 Dtid : SV_DispatchThreadID)
+{
+    RCAS(LocalThreadId, WorkGroupId, Dtid);
+}
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_rcas_pass.hlsl.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_rcas_pass.hlsl.meta
new file mode 100644
index 00000000..fb9bfe24
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_rcas_pass.hlsl.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: 9a15fc73170a9bc478801c8fa4d8d574
+ShaderIncludeImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_reconstruct_previous_depth_pass.hlsl b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_reconstruct_previous_depth_pass.hlsl
new file mode 100644
index 00000000..f277fd16
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_reconstruct_previous_depth_pass.hlsl
@@ -0,0 +1,64 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#define FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS                  0
+#define FSR3UPSCALER_BIND_SRV_INPUT_DEPTH                           1
+#define FSR3UPSCALER_BIND_SRV_INPUT_COLOR                           2
+#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE                        3
+
+#define FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH      0
+#define FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS                1
+#define FSR3UPSCALER_BIND_UAV_DILATED_DEPTH                         2
+#define FSR3UPSCALER_BIND_UAV_LOCK_INPUT_LUMA                       3
+
+#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER                           0
+
+#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h"
+#include "fsr3upscaler/ffx_fsr3upscaler_common.h"
+#include "fsr3upscaler/ffx_fsr3upscaler_sample.h"
+#include "fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h"
+
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH
+#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8
+#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT
+#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8
+#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH
+#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1
+#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH
+#ifndef FFX_FSR3UPSCALER_NUM_THREADS
+#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)]
+#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS
+
+FFX_PREFER_WAVE64
+FFX_FSR3UPSCALER_NUM_THREADS
+FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT
+void CS(
+    int2 iGroupId : SV_GroupID,
+    int2 iDispatchThreadId : SV_DispatchThreadID,
+    int2 iGroupThreadId : SV_GroupThreadID,
+    int iGroupIndex : SV_GroupIndex
+)
+{
+    ReconstructAndDilate(iDispatchThreadId);
+}
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_reconstruct_previous_depth_pass.hlsl.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_reconstruct_previous_depth_pass.hlsl.meta
new file mode 100644
index 00000000..6489d6d1
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_reconstruct_previous_depth_pass.hlsl.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: bafb3726a76b97a49bb343d8a4323754
+ShaderIncludeImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl
new file mode 100644
index 00000000..6180885e
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl
@@ -0,0 +1,90 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#define FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY                     0
+#define FSR3UPSCALER_BIND_SRV_INPUT_COLOR                           1
+#define FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS                  2
+#define FSR3UPSCALER_BIND_SRV_PREV_PRE_ALPHA_COLOR                  3
+#define FSR3UPSCALER_BIND_SRV_PREV_POST_ALPHA_COLOR                 4
+#define FSR3UPSCALER_BIND_SRV_REACTIVE_MASK                         4
+#define FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK     5
+
+#define FSR3UPSCALER_BIND_UAV_AUTOREACTIVE                          0
+#define FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION                       1
+#define FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR                  2
+#define FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR                 3
+
+#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER                           0
+#define FSR3UPSCALER_BIND_CB_AUTOREACTIVE                           1
+
+#include "fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h"
+#include "fsr3upscaler/ffx_fsr3upscaler_common.h"
+#include "fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h"
+
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH
+#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8
+#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT
+#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8
+#endif // FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT
+#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH
+#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1
+#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH
+#ifndef FFX_FSR3UPSCALER_NUM_THREADS
+#define FFX_FSR3UPSCALER_NUM_THREADS [numthreads(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH)]
+#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS
+
+FFX_FSR3UPSCALER_NUM_THREADS
+FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT
+void CS(uint2 uGroupId : SV_GroupID, uint2 uGroupThreadId : SV_GroupThreadID)
+{
+    FFX_MIN16_I2 uDispatchThreadId = FFX_MIN16_I2(uGroupId * uint2(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT) + uGroupThreadId);
+
+    // ToDo: take into account jitter (i.e. add delta of previous jitter and current jitter to previous UV
+    // fetch pre- and post-alpha color values
+    FFX_MIN16_F2 fUv = ( FFX_MIN16_F2(uDispatchThreadId) + FFX_MIN16_F2(0.5f, 0.5f) ) / FFX_MIN16_F2( RenderSize() );
+    FFX_MIN16_F2 fPrevUV = fUv + FFX_MIN16_F2( LoadInputMotionVector(uDispatchThreadId) );
+    FFX_MIN16_I2 iPrevIdx = FFX_MIN16_I2(fPrevUV * FFX_MIN16_F2(RenderSize()) - 0.5f);
+
+    FFX_MIN16_F3 colorPreAlpha  = FFX_MIN16_F3( LoadOpaqueOnly(  uDispatchThreadId ) );
+    FFX_MIN16_F3 colorPostAlpha = FFX_MIN16_F3( LoadInputColor( uDispatchThreadId ) );
+
+    FFX_MIN16_F2 outReactiveMask = 0;
+    
+    outReactiveMask.y = ComputeTransparencyAndComposition(uDispatchThreadId, iPrevIdx);
+
+    if (outReactiveMask.y > 0.5f)
+    {
+        outReactiveMask.x = ComputeReactive(uDispatchThreadId, iPrevIdx);
+        outReactiveMask.x *= FFX_MIN16_F(fReactiveScale);
+        outReactiveMask.x = outReactiveMask.x < fReactiveMax ? outReactiveMask.x : FFX_MIN16_F( fReactiveMax );
+    }
+
+    outReactiveMask.y *= FFX_MIN16_F(fTcScale  );
+
+    outReactiveMask.x = max( outReactiveMask.x, FFX_MIN16_F( LoadReactiveMask(uDispatchThreadId) ) );
+    outReactiveMask.y = max( outReactiveMask.y, FFX_MIN16_F( LoadTransparencyAndCompositionMask(uDispatchThreadId) ) );
+
+    StoreAutoReactive(uDispatchThreadId, outReactiveMask);
+
+    StorePrevPreAlpha(uDispatchThreadId, colorPreAlpha);
+    StorePrevPostAlpha(uDispatchThreadId, colorPostAlpha);
+}
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl.meta
new file mode 100644
index 00000000..02c5f465
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/ffx_fsr3upscaler_tcr_autogen_pass.hlsl.meta
@@ -0,0 +1,7 @@
+fileFormatVersion: 2
+guid: 712d171118b59fc4fb28d0d487060d42
+ShaderIncludeImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler.meta
new file mode 100644
index 00000000..2626a2e2
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: ae9c6d015ae76544f9e8117e79ea862b
+folderAsset: yes
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_common_types.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_common_types.h
new file mode 100644
index 00000000..f0b62ab3
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_common_types.h
@@ -0,0 +1,616 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#ifndef FFX_COMMON_TYPES_H
+#define FFX_COMMON_TYPES_H
+
+#if defined(FFX_CPU)
+#define FFX_PARAMETER_IN
+#define FFX_PARAMETER_OUT
+#define FFX_PARAMETER_INOUT
+#define FFX_PARAMETER_UNIFORM
+#elif defined(FFX_HLSL)
+#define FFX_PARAMETER_IN        in
+#define FFX_PARAMETER_OUT       out
+#define FFX_PARAMETER_INOUT     inout
+#define FFX_PARAMETER_UNIFORM uniform
+#elif defined(FFX_GLSL)
+#define FFX_PARAMETER_IN        in
+#define FFX_PARAMETER_OUT       out
+#define FFX_PARAMETER_INOUT     inout
+#define FFX_PARAMETER_UNIFORM const //[cacao_placeholder] until a better fit is found!
+#endif // #if defined(FFX_CPU)
+
+#if defined(FFX_CPU)
+/// A typedef for a boolean value.
+///
+/// @ingroup CPUTypes
+typedef bool FfxBoolean;
+
+/// A typedef for a unsigned 8bit integer.
+///
+/// @ingroup CPUTypes
+typedef uint8_t FfxUInt8;
+
+/// A typedef for a unsigned 16bit integer.
+///
+/// @ingroup CPUTypes
+typedef uint16_t FfxUInt16;
+
+/// A typedef for a unsigned 32bit integer.
+///
+/// @ingroup CPUTypes
+typedef uint32_t FfxUInt32;
+
+/// A typedef for a unsigned 64bit integer.
+///
+/// @ingroup CPUTypes
+typedef uint64_t FfxUInt64;
+
+/// A typedef for a signed 8bit integer.
+///
+/// @ingroup CPUTypes
+typedef int8_t FfxInt8;
+
+/// A typedef for a signed 16bit integer.
+///
+/// @ingroup CPUTypes
+typedef int16_t FfxInt16;
+
+/// A typedef for a signed 32bit integer.
+///
+/// @ingroup CPUTypes
+typedef int32_t FfxInt32;
+
+/// A typedef for a signed 64bit integer.
+///
+/// @ingroup CPUTypes
+typedef int64_t FfxInt64;
+
+/// A typedef for a floating point value.
+///
+/// @ingroup CPUTypes
+typedef float FfxFloat32;
+
+/// A typedef for a 2-dimensional floating point value.
+///
+/// @ingroup CPUTypes
+typedef float FfxFloat32x2[2];
+
+/// A typedef for a 3-dimensional floating point value.
+///
+/// @ingroup CPUTypes
+typedef float FfxFloat32x3[3];
+
+/// A typedef for a 4-dimensional floating point value.
+///
+/// @ingroup CPUTypes
+typedef float FfxFloat32x4[4];
+
+/// A typedef for a 2-dimensional 32bit unsigned integer.
+///
+/// @ingroup CPUTypes
+typedef uint32_t FfxUInt32x2[2];
+
+/// A typedef for a 3-dimensional 32bit unsigned integer.
+///
+/// @ingroup CPUTypes
+typedef uint32_t FfxUInt32x3[3];
+
+/// A typedef for a 4-dimensional 32bit unsigned integer.
+///
+/// @ingroup CPUTypes
+typedef uint32_t FfxUInt32x4[4];
+#endif // #if defined(FFX_CPU)
+
+#if defined(FFX_HLSL)
+
+#define FfxFloat32Mat4 matrix <float, 4, 4>
+#define FfxFloat32Mat3 matrix <float, 3, 3>
+
+/// A typedef for a boolean value.
+///
+/// @ingroup HLSLTypes
+typedef bool FfxBoolean;
+
+#if FFX_HLSL_SM>=62
+
+/// @defgroup HLSL62Types HLSL 6.2 And Above Types
+/// HLSL 6.2 and above type defines for all commonly used variables
+/// 
+/// @ingroup HLSLTypes
+
+/// A typedef for a floating point value.
+///
+/// @ingroup HLSL62Types
+typedef float32_t   FfxFloat32;
+
+/// A typedef for a 2-dimensional floating point value.
+///
+/// @ingroup HLSL62Types
+typedef float32_t2  FfxFloat32x2;
+
+/// A typedef for a 3-dimensional floating point value.
+///
+/// @ingroup HLSL62Types
+typedef float32_t3  FfxFloat32x3;
+
+/// A typedef for a 4-dimensional floating point value.
+///
+/// @ingroup HLSL62Types
+typedef float32_t4  FfxFloat32x4;
+
+/// A [cacao_placeholder] typedef for matrix type until confirmed.
+typedef float4x4 FfxFloat32x4x4;
+typedef float3x3 FfxFloat32x3x3;
+typedef float2x2 FfxFloat32x2x2;
+
+/// A typedef for a unsigned 32bit integer.
+///
+/// @ingroup HLSL62Types
+typedef uint32_t    FfxUInt32;
+
+/// A typedef for a 2-dimensional 32bit unsigned integer.
+///
+/// @ingroup HLSL62Types
+typedef uint32_t2   FfxUInt32x2;
+
+/// A typedef for a 3-dimensional 32bit unsigned integer.
+///
+/// @ingroup HLSL62Types
+typedef uint32_t3   FfxUInt32x3;
+
+/// A typedef for a 4-dimensional 32bit unsigned integer.
+///
+/// @ingroup HLSL62Types
+typedef uint32_t4   FfxUInt32x4;
+
+/// A typedef for a signed 32bit integer.
+///
+/// @ingroup HLSL62Types
+typedef int32_t     FfxInt32;
+
+/// A typedef for a 2-dimensional signed 32bit integer.
+///
+/// @ingroup HLSL62Types
+typedef int32_t2    FfxInt32x2;
+
+/// A typedef for a 3-dimensional signed 32bit integer.
+///
+/// @ingroup HLSL62Types
+typedef int32_t3    FfxInt32x3;
+
+/// A typedef for a 4-dimensional signed 32bit integer.
+///
+/// @ingroup HLSL62Types
+typedef int32_t4    FfxInt32x4;
+
+#else // #if FFX_HLSL_SM>=62
+
+/// @defgroup HLSLBaseTypes HLSL 6.1 And Below Types
+/// HLSL 6.1 and below type defines for all commonly used variables
+/// 
+/// @ingroup HLSLTypes
+
+#define FfxFloat32   float
+#define FfxFloat32x2 float2
+#define FfxFloat32x3 float3
+#define FfxFloat32x4 float4
+
+/// A [cacao_placeholder] typedef for matrix type until confirmed.
+#define FfxFloat32x4x4 float4x4
+#define FfxFloat32x3x3 float3x3
+#define FfxFloat32x2x2 float2x2
+
+/// A typedef for a unsigned 32bit integer.
+///
+/// @ingroup GPU
+typedef uint        FfxUInt32;
+typedef uint2       FfxUInt32x2;
+typedef uint3       FfxUInt32x3;
+typedef uint4       FfxUInt32x4;
+
+typedef int         FfxInt32;
+typedef int2        FfxInt32x2;
+typedef int3        FfxInt32x3;
+typedef int4        FfxInt32x4;
+
+#endif // #if FFX_HLSL_SM>=62
+
+#if FFX_HALF
+
+#if FFX_HLSL_SM >= 62
+
+typedef float16_t   FfxFloat16;
+typedef float16_t2  FfxFloat16x2;
+typedef float16_t3  FfxFloat16x3;
+typedef float16_t4  FfxFloat16x4;
+
+/// A typedef for an unsigned 16bit integer.
+///
+/// @ingroup HLSLTypes
+typedef uint16_t    FfxUInt16;
+typedef uint16_t2   FfxUInt16x2;
+typedef uint16_t3   FfxUInt16x3;
+typedef uint16_t4   FfxUInt16x4;
+
+/// A typedef for a signed 16bit integer.
+///
+/// @ingroup HLSLTypes
+typedef int16_t     FfxInt16;
+typedef int16_t2    FfxInt16x2;
+typedef int16_t3    FfxInt16x3;
+typedef int16_t4    FfxInt16x4;
+#elif SHADER_API_PSSL
+#pragma argument(realtypes)     // Enable true 16-bit types
+
+typedef half        FfxFloat16;
+typedef half2       FfxFloat16x2;
+typedef half3       FfxFloat16x3;
+typedef half4       FfxFloat16x4;
+
+/// A typedef for an unsigned 16bit integer.
+///
+/// @ingroup GPU
+typedef ushort      FfxUInt16;
+typedef ushort2     FfxUInt16x2;
+typedef ushort3     FfxUInt16x3;
+typedef ushort4     FfxUInt16x4;
+
+/// A typedef for a signed 16bit integer.
+///
+/// @ingroup GPU
+typedef short       FfxInt16;
+typedef short2      FfxInt16x2;
+typedef short3      FfxInt16x3;
+typedef short4      FfxInt16x4;
+#else // #if FFX_HLSL_SM>=62
+typedef min16float  FfxFloat16;
+typedef min16float2 FfxFloat16x2;
+typedef min16float3 FfxFloat16x3;
+typedef min16float4 FfxFloat16x4;
+
+/// A typedef for an unsigned 16bit integer.
+///
+/// @ingroup HLSLTypes
+typedef min16uint   FfxUInt16;
+typedef min16uint2  FfxUInt16x2;
+typedef min16uint3  FfxUInt16x3;
+typedef min16uint4  FfxUInt16x4;
+
+/// A typedef for a signed 16bit integer.
+///
+/// @ingroup HLSLTypes
+typedef min16int    FfxInt16;
+typedef min16int2   FfxInt16x2;
+typedef min16int3   FfxInt16x3;
+typedef min16int4   FfxInt16x4;
+#endif  // #if FFX_HLSL_SM>=62
+
+#endif // FFX_HALF
+
+#endif // #if defined(FFX_HLSL)
+
+#if defined(FFX_GLSL)
+
+#define FfxFloat32Mat4 mat4
+#define FfxFloat32Mat3 mat3
+
+/// A typedef for a boolean value.
+///
+/// @ingroup GLSLTypes
+#define FfxBoolean   bool
+#define FfxFloat32   float
+#define FfxFloat32x2 vec2
+#define FfxFloat32x3 vec3
+#define FfxFloat32x4 vec4
+#define FfxUInt32    uint
+#define FfxUInt32x2  uvec2
+#define FfxUInt32x3  uvec3
+#define FfxUInt32x4  uvec4
+#define FfxInt32     int
+#define FfxInt32x2   ivec2
+#define FfxInt32x3   ivec3
+#define FfxInt32x4   ivec4
+
+/// A [cacao_placeholder] typedef for matrix type until confirmed.
+#define FfxFloat32x4x4 mat4
+#define FfxFloat32x3x3 mat3
+#define FfxFloat32x2x2 mat2
+
+#if FFX_HALF
+#define FfxFloat16   float16_t
+#define FfxFloat16x2 f16vec2
+#define FfxFloat16x3 f16vec3
+#define FfxFloat16x4 f16vec4
+#define FfxUInt16    uint16_t
+#define FfxUInt16x2  u16vec2
+#define FfxUInt16x3  u16vec3
+#define FfxUInt16x4  u16vec4
+#define FfxInt16     int16_t
+#define FfxInt16x2   i16vec2
+#define FfxInt16x3   i16vec3
+#define FfxInt16x4   i16vec4
+#endif // FFX_HALF
+#endif // #if defined(FFX_GLSL)
+
+// Global toggles:
+// #define FFX_HALF            (1)
+// #define FFX_HLSL_SM         (62)
+
+#if FFX_HALF && !defined(SHADER_API_PSSL)
+
+#if FFX_HLSL_SM >= 62
+
+#define FFX_MIN16_SCALAR( TypeName, BaseComponentType )           typedef BaseComponentType##16_t TypeName;
+#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL )      typedef vector<BaseComponentType##16_t, COL> TypeName;
+#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType##16_t, ROW, COL> TypeName;
+
+#define FFX_16BIT_SCALAR( TypeName, BaseComponentType )           typedef BaseComponentType##16_t TypeName;
+#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL )      typedef vector<BaseComponentType##16_t, COL> TypeName;
+#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType##16_t, ROW, COL> TypeName;
+
+#else //FFX_HLSL_SM>=62
+
+#define FFX_MIN16_SCALAR( TypeName, BaseComponentType )           typedef min16##BaseComponentType TypeName;
+#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL )      typedef vector<min16##BaseComponentType, COL> TypeName;
+#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<min16##BaseComponentType, ROW, COL> TypeName;
+
+#define FFX_16BIT_SCALAR( TypeName, BaseComponentType )           FFX_MIN16_SCALAR( TypeName, BaseComponentType );
+#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL )      FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL );
+#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL );
+
+#endif //FFX_HLSL_SM>=62
+
+#else //FFX_HALF
+
+#define FFX_MIN16_SCALAR( TypeName, BaseComponentType )           typedef BaseComponentType TypeName;
+#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL )      typedef vector<BaseComponentType, COL> TypeName;
+#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType, ROW, COL> TypeName;
+
+#define FFX_16BIT_SCALAR( TypeName, BaseComponentType )           typedef BaseComponentType TypeName;
+#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL )      typedef vector<BaseComponentType, COL> TypeName;
+#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType, ROW, COL> TypeName;
+
+#endif //FFX_HALF
+
+#if defined(FFX_GPU)
+// Common typedefs:
+#if defined(FFX_HLSL) && !defined(SHADER_API_PSSL)
+FFX_MIN16_SCALAR( FFX_MIN16_F , float );
+FFX_MIN16_VECTOR( FFX_MIN16_F2, float, 2 );
+FFX_MIN16_VECTOR( FFX_MIN16_F3, float, 3 );
+FFX_MIN16_VECTOR( FFX_MIN16_F4, float, 4 );
+
+FFX_MIN16_SCALAR( FFX_MIN16_I,  int );
+FFX_MIN16_VECTOR( FFX_MIN16_I2, int, 2 );
+FFX_MIN16_VECTOR( FFX_MIN16_I3, int, 3 );
+FFX_MIN16_VECTOR( FFX_MIN16_I4, int, 4 );
+
+FFX_MIN16_SCALAR( FFX_MIN16_U,  uint );
+FFX_MIN16_VECTOR( FFX_MIN16_U2, uint, 2 );
+FFX_MIN16_VECTOR( FFX_MIN16_U3, uint, 3 );
+FFX_MIN16_VECTOR( FFX_MIN16_U4, uint, 4 );
+
+FFX_16BIT_SCALAR( FFX_F16_t , float );
+FFX_16BIT_VECTOR( FFX_F16_t2, float, 2 );
+FFX_16BIT_VECTOR( FFX_F16_t3, float, 3 );
+FFX_16BIT_VECTOR( FFX_F16_t4, float, 4 );
+
+FFX_16BIT_SCALAR( FFX_I16_t,  int );
+FFX_16BIT_VECTOR( FFX_I16_t2, int, 2 );
+FFX_16BIT_VECTOR( FFX_I16_t3, int, 3 );
+FFX_16BIT_VECTOR( FFX_I16_t4, int, 4 );
+
+FFX_16BIT_SCALAR( FFX_U16_t,  uint );
+FFX_16BIT_VECTOR( FFX_U16_t2, uint, 2 );
+FFX_16BIT_VECTOR( FFX_U16_t3, uint, 3 );
+FFX_16BIT_VECTOR( FFX_U16_t4, uint, 4 );
+
+#define TYPEDEF_MIN16_TYPES(Prefix)           \
+typedef FFX_MIN16_F     Prefix##_F;           \
+typedef FFX_MIN16_F2    Prefix##_F2;          \
+typedef FFX_MIN16_F3    Prefix##_F3;          \
+typedef FFX_MIN16_F4    Prefix##_F4;          \
+typedef FFX_MIN16_I     Prefix##_I;           \
+typedef FFX_MIN16_I2    Prefix##_I2;          \
+typedef FFX_MIN16_I3    Prefix##_I3;          \
+typedef FFX_MIN16_I4    Prefix##_I4;          \
+typedef FFX_MIN16_U     Prefix##_U;           \
+typedef FFX_MIN16_U2    Prefix##_U2;          \
+typedef FFX_MIN16_U3    Prefix##_U3;          \
+typedef FFX_MIN16_U4    Prefix##_U4;
+
+#define TYPEDEF_16BIT_TYPES(Prefix)           \
+typedef FFX_16BIT_F     Prefix##_F;           \
+typedef FFX_16BIT_F2    Prefix##_F2;          \
+typedef FFX_16BIT_F3    Prefix##_F3;          \
+typedef FFX_16BIT_F4    Prefix##_F4;          \
+typedef FFX_16BIT_I     Prefix##_I;           \
+typedef FFX_16BIT_I2    Prefix##_I2;          \
+typedef FFX_16BIT_I3    Prefix##_I3;          \
+typedef FFX_16BIT_I4    Prefix##_I4;          \
+typedef FFX_16BIT_U     Prefix##_U;           \
+typedef FFX_16BIT_U2    Prefix##_U2;          \
+typedef FFX_16BIT_U3    Prefix##_U3;          \
+typedef FFX_16BIT_U4    Prefix##_U4;
+
+#define TYPEDEF_FULL_PRECISION_TYPES(Prefix)  \
+typedef FfxFloat32      Prefix##_F;           \
+typedef FfxFloat32x2    Prefix##_F2;          \
+typedef FfxFloat32x3    Prefix##_F3;          \
+typedef FfxFloat32x4    Prefix##_F4;          \
+typedef FfxInt32        Prefix##_I;           \
+typedef FfxInt32x2      Prefix##_I2;          \
+typedef FfxInt32x3      Prefix##_I3;          \
+typedef FfxInt32x4      Prefix##_I4;          \
+typedef FfxUInt32       Prefix##_U;           \
+typedef FfxUInt32x2     Prefix##_U2;          \
+typedef FfxUInt32x3     Prefix##_U3;          \
+typedef FfxUInt32x4     Prefix##_U4;
+#endif // #if defined(FFX_HLSL)
+
+#if defined(SHADER_API_PSSL)
+
+#define unorm
+#define globallycoherent
+
+#if FFX_HALF
+
+#define  FFX_MIN16_F  half
+#define  FFX_MIN16_F2 half2
+#define  FFX_MIN16_F3 half3
+#define  FFX_MIN16_F4 half4
+
+#define  FFX_MIN16_I  short
+#define  FFX_MIN16_I2 short2
+#define  FFX_MIN16_I3 short3
+#define  FFX_MIN16_I4 short4
+
+#define  FFX_MIN16_U  ushort
+#define  FFX_MIN16_U2 ushort2
+#define  FFX_MIN16_U3 ushort3
+#define  FFX_MIN16_U4 ushort4
+
+#define FFX_16BIT_F  half
+#define FFX_16BIT_F2 half2
+#define FFX_16BIT_F3 half3
+#define FFX_16BIT_F4 half4
+
+#define FFX_16BIT_I  short
+#define FFX_16BIT_I2 short2
+#define FFX_16BIT_I3 short3
+#define FFX_16BIT_I4 short4
+
+#define FFX_16BIT_U  ushort
+#define FFX_16BIT_U2 ushort2
+#define FFX_16BIT_U3 ushort3
+#define FFX_16BIT_U4 ushort4
+
+#else // FFX_HALF
+
+#define  FFX_MIN16_F  float
+#define  FFX_MIN16_F2 float2
+#define  FFX_MIN16_F3 float3
+#define  FFX_MIN16_F4 float4
+
+#define  FFX_MIN16_I  int
+#define  FFX_MIN16_I2 int2
+#define  FFX_MIN16_I3 int3
+#define  FFX_MIN16_I4 int4
+
+#define  FFX_MIN16_U  uint
+#define  FFX_MIN16_U2 uint2
+#define  FFX_MIN16_U3 uint3
+#define  FFX_MIN16_U4 uint4
+
+#define FFX_16BIT_F  float
+#define FFX_16BIT_F2 float2
+#define FFX_16BIT_F3 float3
+#define FFX_16BIT_F4 float4
+
+#define FFX_16BIT_I  int
+#define FFX_16BIT_I2 int2
+#define FFX_16BIT_I3 int3
+#define FFX_16BIT_I4 int4
+
+#define FFX_16BIT_U  uint
+#define FFX_16BIT_U2 uint2
+#define FFX_16BIT_U3 uint3
+#define FFX_16BIT_U4 uint4
+
+#endif // FFX_HALF
+
+#endif  // #if defined(SHADER_API_PSSL)
+
+#if defined(FFX_GLSL)
+
+#if FFX_HALF
+
+#define  FFX_MIN16_F  float16_t
+#define  FFX_MIN16_F2 f16vec2
+#define  FFX_MIN16_F3 f16vec3
+#define  FFX_MIN16_F4 f16vec4
+
+#define  FFX_MIN16_I  int16_t
+#define  FFX_MIN16_I2 i16vec2
+#define  FFX_MIN16_I3 i16vec3
+#define  FFX_MIN16_I4 i16vec4
+
+#define  FFX_MIN16_U  uint16_t
+#define  FFX_MIN16_U2 u16vec2
+#define  FFX_MIN16_U3 u16vec3
+#define  FFX_MIN16_U4 u16vec4
+
+#define FFX_16BIT_F  float16_t
+#define FFX_16BIT_F2 f16vec2
+#define FFX_16BIT_F3 f16vec3
+#define FFX_16BIT_F4 f16vec4
+
+#define FFX_16BIT_I  int16_t
+#define FFX_16BIT_I2 i16vec2
+#define FFX_16BIT_I3 i16vec3
+#define FFX_16BIT_I4 i16vec4
+
+#define FFX_16BIT_U  uint16_t
+#define FFX_16BIT_U2 u16vec2
+#define FFX_16BIT_U3 u16vec3
+#define FFX_16BIT_U4 u16vec4
+
+#else // FFX_HALF
+
+#define  FFX_MIN16_F  float
+#define  FFX_MIN16_F2 vec2
+#define  FFX_MIN16_F3 vec3
+#define  FFX_MIN16_F4 vec4
+
+#define  FFX_MIN16_I  int
+#define  FFX_MIN16_I2 ivec2
+#define  FFX_MIN16_I3 ivec3
+#define  FFX_MIN16_I4 ivec4
+
+#define  FFX_MIN16_U  uint
+#define  FFX_MIN16_U2 uvec2
+#define  FFX_MIN16_U3 uvec3
+#define  FFX_MIN16_U4 uvec4
+
+#define FFX_16BIT_F  float
+#define FFX_16BIT_F2 vec2
+#define FFX_16BIT_F3 vec3
+#define FFX_16BIT_F4 vec4
+
+#define FFX_16BIT_I  int
+#define FFX_16BIT_I2 ivec2
+#define FFX_16BIT_I3 ivec3
+#define FFX_16BIT_I4 ivec4
+
+#define FFX_16BIT_U  uint
+#define FFX_16BIT_U2 uvec2
+#define FFX_16BIT_U3 uvec3
+#define FFX_16BIT_U4 uvec4
+
+#endif // FFX_HALF
+
+#endif // #if defined(FFX_GLSL)
+
+#endif // #if defined(FFX_GPU)
+#endif // #ifndef FFX_COMMON_TYPES_H
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_common_types.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_common_types.h.meta
new file mode 100644
index 00000000..8d9b15ff
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_common_types.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: 7974b728d5c1b6d4a8a8e3965d03f96d
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core.h
new file mode 100644
index 00000000..02f6b3f6
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core.h
@@ -0,0 +1,80 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+/// @defgroup FfxGPU GPU
+/// The FidelityFX SDK GPU References
+/// 
+/// @ingroup ffxSDK
+
+/// @defgroup FfxHLSL HLSL References
+/// FidelityFX SDK HLSL GPU References
+/// 
+/// @ingroup FfxGPU
+
+/// @defgroup FfxGLSL GLSL References
+/// FidelityFX SDK GLSL GPU References
+/// 
+/// @ingroup FfxGPU
+
+/// @defgroup FfxGPUEffects FidelityFX GPU References
+/// FidelityFX Effect GPU Reference Documentation
+/// 
+/// @ingroup FfxGPU
+
+/// @defgroup GPUCore GPU Core
+/// GPU defines and functions
+/// 
+/// @ingroup FfxGPU
+
+#if !defined(FFX_CORE_H)
+#define FFX_CORE_H
+
+#ifdef __hlsl_dx_compiler
+#pragma dxc diagnostic push
+#pragma dxc diagnostic ignored "-Wambig-lit-shift"
+#endif  //__hlsl_dx_compiler
+
+#include "ffx_common_types.h"
+
+#if defined(FFX_CPU)
+    #include "ffx_core_cpu.h"
+#endif // #if defined(FFX_CPU)
+
+#if defined(FFX_GLSL) && defined(FFX_GPU)
+    #include "ffx_core_glsl.h"
+#endif // #if defined(FFX_GLSL) && defined(FFX_GPU)
+
+#if defined(FFX_HLSL) && defined(FFX_GPU)
+    #include "ffx_core_hlsl.h"
+#endif // #if defined(FFX_HLSL) && defined(FFX_GPU)
+
+#if defined(FFX_GPU)
+    #include "ffx_core_gpu_common.h"
+    #include "ffx_core_gpu_common_half.h"
+    #include "ffx_core_portability.h"
+#endif // #if defined(FFX_GPU)
+
+#ifdef __hlsl_dx_compiler
+#pragma dxc diagnostic pop
+#endif  //__hlsl_dx_compiler
+
+#endif // #if !defined(FFX_CORE_H)
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core.h.meta
new file mode 100644
index 00000000..562741a2
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: b91c5f52b89ff554dacb51045a802ed8
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_cpu.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_cpu.h
new file mode 100644
index 00000000..865258d4
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_cpu.h
@@ -0,0 +1,338 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+/// A define for a true value in a boolean expression.
+///
+/// @ingroup CPUTypes
+#define FFX_TRUE (1)
+
+/// A define for a false value in a boolean expression.
+///
+/// @ingroup CPUTypes
+#define FFX_FALSE (0)
+ 
+#if !defined(FFX_STATIC)
+/// A define to abstract declaration of static variables and functions.
+///
+/// @ingroup CPUTypes
+#define FFX_STATIC static
+#endif // #if !defined(FFX_STATIC)
+
+/// @defgroup CPUCore CPU Core
+/// Core CPU-side defines and functions
+/// 
+/// @ingroup ffxHost
+
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wunused-variable"
+#endif
+
+/// Interpret the bit layout of an IEEE-754 floating point value as an unsigned integer.
+///
+/// @param [in] x               A 32bit floating value.
+///
+/// @returns
+/// An unsigned 32bit integer value containing the bit pattern of <c><i>x</i></c>.
+/// 
+/// @ingroup CPUCore
+FFX_STATIC FfxUInt32 ffxAsUInt32(FfxFloat32 x)
+{
+    union
+    {
+        FfxFloat32 f;
+        FfxUInt32  u;
+    } bits;
+
+    bits.f = x;
+    return bits.u;
+}
+
+FFX_STATIC FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b)
+{
+    return a[0] * b[0] + a[1] * b[1];
+}
+
+FFX_STATIC FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b)
+{
+    return a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
+}
+
+FFX_STATIC FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b)
+{
+    return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup CPUCore
+FFX_STATIC FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
+{
+    return y * t + (-x * t + x);
+}
+
+/// Compute the reciprocal of a value.
+///
+/// @param [in] x               The value to compute the reciprocal for.
+///
+/// @returns
+/// The reciprocal value of <c><i>x</i></c>.
+///
+/// @ingroup CPUCore
+FFX_STATIC FfxFloat32 ffxReciprocal(FfxFloat32 x)
+{
+    return 1.0f / x;
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x                   The first value to compute the min of.
+///
+/// @returns
+/// The the square root of <c><i>x</i></c>.
+///
+/// @ingroup CPUCore
+FFX_STATIC FfxFloat32 ffxSqrt(FfxFloat32 x)
+{
+    return sqrt(x);
+}
+
+FFX_STATIC FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)
+{
+    return FfxUInt32(FfxInt32(a) >> FfxInt32(b));
+}
+
+/// Compute the factional part of a decimal value.
+///
+/// This function calculates <c><i>x - floor(x)</i></c>. 
+///
+/// @param [in] x               The value to compute the fractional part from.
+///
+/// @returns
+/// The fractional part of <c><i>x</i></c>.
+///
+/// @ingroup CPUCore
+FFX_STATIC FfxFloat32 ffxFract(FfxFloat32 x)
+{
+    return x - floor(x);
+}
+
+/// Compute the reciprocal square root of a value.
+///
+/// @param [in] x               The value to compute the reciprocal for.
+///
+/// @returns
+/// The reciprocal square root value of <c><i>x</i></c>.
+///
+/// @ingroup CPUCore
+FFX_STATIC FfxFloat32 rsqrt(FfxFloat32 x)
+{
+    return ffxReciprocal(ffxSqrt(x));
+}
+
+FFX_STATIC FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y)
+{
+    return x < y ? x : y;
+}
+
+FFX_STATIC FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y)
+{
+    return x < y ? x : y;
+}
+
+FFX_STATIC FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y)
+{
+    return x > y ? x : y;
+}
+
+FFX_STATIC FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y)
+{
+    return x > y ? x : y;
+}
+
+/// Clamp a value to a [0..1] range.
+///
+/// @param [in] x               The value to clamp to [0..1] range.
+///
+/// @returns
+/// The clamped version of <c><i>x</i></c>.
+///
+/// @ingroup CPUCore
+FFX_STATIC FfxFloat32 ffxSaturate(FfxFloat32 x)
+{
+    return ffxMin(1.0f, ffxMax(0.0f, x));
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+FFX_STATIC void opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
+{
+    d[0] = a[0] + b;
+    d[1] = a[1] + b;
+    d[2] = a[2] + b;
+    return;
+}
+
+FFX_STATIC void opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a)
+{
+    d[0] = a[0];
+    d[1] = a[1];
+    d[2] = a[2];
+    return;
+}
+
+FFX_STATIC void opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b)
+{
+    d[0] = a[0] * b[0];
+    d[1] = a[1] * b[1];
+    d[2] = a[2] * b[2];
+    return;
+}
+
+FFX_STATIC void opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
+{
+    d[0] = a[0] * b;
+    d[1] = a[1] * b;
+    d[2] = a[2] * b;
+    return;
+}
+
+FFX_STATIC void opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a)
+{
+    d[0] = ffxReciprocal(a[0]);
+    d[1] = ffxReciprocal(a[1]);
+    d[2] = ffxReciprocal(a[2]);
+    return;
+}
+
+/// Convert FfxFloat32 to half (in lower 16-bits of output).
+/// 
+/// This function implements the same fast technique that is documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
+/// 
+/// The function supports denormals.
+/// 
+/// Some conversion rules are to make computations possibly "safer" on the GPU,
+///  -INF & -NaN -> -65504
+///  +INF & +NaN -> +65504
+///
+/// @param [in] f               The 32bit floating point value to convert.
+/// 
+/// @returns
+/// The closest 16bit floating point value to <c><i>f</i></c>.
+/// 
+/// @ingroup CPUCore
+FFX_STATIC FfxUInt32 f32tof16(FfxFloat32 f)
+{
+    static FfxUInt16 base[512] = {
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400,
+        0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000,
+        0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002,
+        0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00,
+        0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, 0xf000, 0xf400, 0xf800,
+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff
+    };
+    
+    static FfxUInt8 shift[512] = {
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+        0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+        0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18
+    };
+
+    union
+    {
+        FfxFloat32      f;
+        FfxUInt32 u;
+    } bits;
+
+    bits.f       = f;
+    FfxUInt32 u = bits.u;
+    FfxUInt32 i = u >> 23;
+    return (FfxUInt32)(base[i]) + ((u & 0x7fffff) >> shift[i]);
+}
+
+/// Pack 2x32-bit floating point values in a single 32bit value.
+///
+/// This function first converts each component of <c><i>value</i></c> into their nearest 16-bit floating
+/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the
+/// 32bit unsigned integer respectively.
+///
+/// @param [in] x               A 2-dimensional floating point value to convert and pack.
+///
+/// @returns
+/// A packed 32bit value containing 2 16bit floating point values.
+///
+/// @ingroup CPUCore
+FFX_STATIC FfxUInt32 packHalf2x16(FfxFloat32x2 x)
+{
+    return f32tof16(x[0]) + (f32tof16(x[1]) << 16);
+}
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_cpu.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_cpu.h.meta
new file mode 100644
index 00000000..9aed8740
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_cpu.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: 4c88c0b7a4dec1e479272449c19ca981
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common.h
new file mode 100644
index 00000000..2f687df9
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common.h
@@ -0,0 +1,2784 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+/// A define for a true value in a boolean expression.
+///
+/// @ingroup GPUCore
+#define FFX_TRUE (true)
+
+/// A define for a false value in a boolean expression.
+///
+/// @ingroup GPUCore
+#define FFX_FALSE (false)
+
+/// A define value for positive infinity.
+///
+/// @ingroup GPUCore
+#define FFX_POSITIVE_INFINITY_FLOAT ffxAsFloat(0x7f800000u)
+
+/// A define value for negative infinity.
+///
+/// @ingroup GPUCore
+#define FFX_NEGATIVE_INFINITY_FLOAT ffxAsFloat(0xff800000u)
+
+/// A define value for PI.
+/// 
+/// @ingroup GPUCore
+#define FFX_PI  (3.14159)
+
+FFX_STATIC const FfxFloat32 FFX_FP16_MIN        = 6.10e-05f;
+FFX_STATIC const FfxFloat32 FFX_FP16_MAX        = 65504.0f;
+FFX_STATIC const FfxFloat32 FFX_TONEMAP_EPSILON = 1.0f / FFX_FP16_MAX;
+
+/// Compute the reciprocal of <c><i>value</i></c>.
+///
+/// @param [in] value               The value to compute the reciprocal of.
+///
+/// @returns
+/// The 1 / <c><i>value</i></c>.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxReciprocal(FfxFloat32 value)
+{
+    return rcp(value);
+}
+
+/// Compute the reciprocal of <c><i>value</i></c>.
+///
+/// @param [in] value               The value to compute the reciprocal of.
+///
+/// @returns
+/// The 1 / <c><i>value</i></c>.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxReciprocal(FfxFloat32x2 value)
+{
+    return rcp(value);
+}
+
+/// Compute the reciprocal of <c><i>value</i></c>.
+///
+/// @param [in] value               The value to compute the reciprocal of.
+///
+/// @returns
+/// The 1 / <c><i>value</i></c>.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxReciprocal(FfxFloat32x3 value)
+{
+    return rcp(value);
+}
+
+/// Compute the reciprocal of <c><i>value</i></c>.
+///
+/// @param [in] value               The value to compute the reciprocal of.
+///
+/// @returns
+/// The 1 / <c><i>value</i></c>.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxReciprocal(FfxFloat32x4 value)
+{
+    return rcp(value);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxMin(FfxFloat32x2 x, FfxFloat32x2 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxMin(FfxFloat32x3 x, FfxFloat32x3 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxMin(FfxFloat32x4 x, FfxFloat32x4 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt32 ffxMin(FfxInt32 x, FfxInt32 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt32x2 ffxMin(FfxInt32x2 x, FfxInt32x2 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt32x3 ffxMin(FfxInt32x3 x, FfxInt32x3 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt32x4 ffxMin(FfxInt32x4 x, FfxInt32x4 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxMin(FfxUInt32x2 x, FfxUInt32x2 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt32x3 ffxMin(FfxUInt32x3 x, FfxUInt32x3 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt32x4 ffxMin(FfxUInt32x4 x, FfxUInt32x4 y)
+{
+    return min(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxMax(FfxFloat32x2 x, FfxFloat32x2 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxMax(FfxFloat32x3 x, FfxFloat32x3 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxMax(FfxFloat32x4 x, FfxFloat32x4 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt32 ffxMax(FfxInt32 x, FfxInt32 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt32x2 ffxMax(FfxInt32x2 x, FfxInt32x2 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt32x3 ffxMax(FfxInt32x3 x, FfxInt32x3 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt32x4 ffxMax(FfxInt32x4 x, FfxInt32x4 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxMax(FfxUInt32x2 x, FfxUInt32x2 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt32x3 ffxMax(FfxUInt32x3 x, FfxUInt32x3 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt32x4 ffxMax(FfxUInt32x4 x, FfxUInt32x4 y)
+{
+    return max(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x                   The value to raise to the power y.
+/// @param [in] y                   The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxPow(FfxFloat32 x, FfxFloat32 y)
+{
+    return pow(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x                   The value to raise to the power y.
+/// @param [in] y                   The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxPow(FfxFloat32x2 x, FfxFloat32x2 y)
+{
+    return pow(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x                   The value to raise to the power y.
+/// @param [in] y                   The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxPow(FfxFloat32x3 x, FfxFloat32x3 y)
+{
+    return pow(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x                   The value to raise to the power y.
+/// @param [in] y                   The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxPow(FfxFloat32x4 x, FfxFloat32x4 y)
+{
+    return pow(x, y);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x                   The first value to compute the min of.
+///
+/// @returns
+/// The the square root of <c><i>x</i></c>.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxSqrt(FfxFloat32 x)
+{
+    return sqrt(x);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x                   The first value to compute the min of.
+///
+/// @returns
+/// The the square root of <c><i>x</i></c>.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxSqrt(FfxFloat32x2 x)
+{
+    return sqrt(x);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x                   The first value to compute the min of.
+///
+/// @returns
+/// The the square root of <c><i>x</i></c>.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxSqrt(FfxFloat32x3 x)
+{
+    return sqrt(x);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x                   The first value to compute the min of.
+///
+/// @returns
+/// The the square root of <c><i>x</i></c>.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxSqrt(FfxFloat32x4 x)
+{
+    return sqrt(x);
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d                   The value to copy the sign bit into.
+/// @param [in] s                   The value to copy the sign bit from.
+/// 
+/// @returns
+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
+/// 
+/// @ingroup GPUCore
+FfxFloat32 ffxCopySignBit(FfxFloat32 d, FfxFloat32 s)
+{
+    return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & FfxUInt32(0x80000000u)));
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d                   The value to copy the sign bit into.
+/// @param [in] s                   The value to copy the sign bit from.
+///
+/// @returns
+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxCopySignBit(FfxFloat32x2 d, FfxFloat32x2 s)
+{
+    return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast2(0x80000000u)));
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d                   The value to copy the sign bit into.
+/// @param [in] s                   The value to copy the sign bit from.
+///
+/// @returns
+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxCopySignBit(FfxFloat32x3 d, FfxFloat32x3 s)
+{
+    return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast3(0x80000000u)));
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d                   The value to copy the sign bit into.
+/// @param [in] s                   The value to copy the sign bit from.
+///
+/// @returns
+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxCopySignBit(FfxFloat32x4 d, FfxFloat32x4 s)
+{
+    return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast4(0x80000000u)));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 0
+///     m >= 0  := 0
+///     m < 0   := 1
+///
+/// Uses the following useful floating point logic,
+///     saturate(+a*(-INF)==-INF) := 0
+///     saturate( 0*(-INF)== NaN) := 0
+///     saturate(-a*(-INF)==+INF) := 1
+/// 
+/// This function is useful when creating masks for branch-free logic.
+/// 
+/// @param [in] m                       The value to test against 0.
+/// 
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
+/// 
+/// @ingroup GPUCore
+FfxFloat32 ffxIsSigned(FfxFloat32 m)
+{
+    return ffxSaturate(m * FfxFloat32(FFX_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 0
+///     m >= 0  := 0
+///     m < 0   := 1
+///
+/// Uses the following useful floating point logic,
+///     saturate(+a*(-INF)==-INF) := 0
+///     saturate( 0*(-INF)== NaN) := 0
+///     saturate(-a*(-INF)==+INF) := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m                       The value to test against 0.
+///
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxIsSigned(FfxFloat32x2 m)
+{
+    return ffxSaturate(m * ffxBroadcast2(FFX_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 0
+///     m >= 0  := 0
+///     m < 0   := 1
+///
+/// Uses the following useful floating point logic,
+///     saturate(+a*(-INF)==-INF) := 0
+///     saturate( 0*(-INF)== NaN) := 0
+///     saturate(-a*(-INF)==+INF) := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m                       The value to test against 0.
+///
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxIsSigned(FfxFloat32x3 m)
+{
+    return ffxSaturate(m * ffxBroadcast3(FFX_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 0
+///     m >= 0  := 0
+///     m < 0   := 1
+///
+/// Uses the following useful floating point logic,
+///     saturate(+a*(-INF)==-INF) := 0
+///     saturate( 0*(-INF)== NaN) := 0
+///     saturate(-a*(-INF)==+INF) := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m                       The value to test against for have the sign set.
+///
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or positive.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxIsSigned(FfxFloat32x4 m)
+{
+    return ffxSaturate(m * ffxBroadcast4(FFX_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 1
+///     m > 0   := 0
+///     m <= 0  := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m                       The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxIsGreaterThanZero(FfxFloat32 m)
+{
+    return ffxSaturate(m * FfxFloat32(FFX_POSITIVE_INFINITY_FLOAT));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 1
+///     m > 0   := 0
+///     m <= 0  := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m                       The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxIsGreaterThanZero(FfxFloat32x2 m)
+{
+    return ffxSaturate(m * ffxBroadcast2(FFX_POSITIVE_INFINITY_FLOAT));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 1
+///     m > 0   := 0
+///     m <= 0  := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m                       The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxIsGreaterThanZero(FfxFloat32x3 m)
+{
+    return ffxSaturate(m * ffxBroadcast3(FFX_POSITIVE_INFINITY_FLOAT));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 1
+///     m > 0   := 0
+///     m <= 0  := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m                       The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxIsGreaterThanZero(FfxFloat32x4 m)
+{
+    return ffxSaturate(m * ffxBroadcast4(FFX_POSITIVE_INFINITY_FLOAT));
+}
+
+/// Convert a 32bit floating point value to sortable integer.
+/// 
+///  - If sign bit=0, flip the sign bit (positives).
+///  - If sign bit=1, flip all bits     (negatives).
+/// 
+/// The function has the side effects that:
+///  - Larger integers are more positive values.
+///  - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage).
+/// 
+/// @param [in] value                       The floating point value to make sortable.
+/// 
+/// @returns
+/// The sortable integer value.
+/// 
+/// @ingroup GPUCore
+FfxUInt32 ffxFloatToSortableInteger(FfxUInt32 value)
+{
+    return value ^ ((AShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000));
+}
+
+/// Convert a sortable integer to a 32bit floating point value.
+///
+/// The function has the side effects that:
+///  - If sign bit=1, flip the sign bit (positives).
+///  - If sign bit=0, flip all bits     (negatives).
+///
+/// @param [in] value                       The floating point value to make sortable.
+///
+/// @returns
+/// The sortable integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxSortableIntegerToFloat(FfxUInt32 value)
+{
+    return value ^ ((~AShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000));
+}
+
+/// Calculate a low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent 
+/// presentation materials:
+/// 
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+/// 
+/// @param [in] value           The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateSqrt(FfxFloat32 value)
+{
+    return ffxAsFloat((ffxAsUInt32(value) >> FfxUInt32(1)) + FfxUInt32(0x1fbc4639));
+}
+
+/// Calculate a low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateReciprocal(FfxFloat32 value)
+{
+    return ffxAsFloat(FfxUInt32(0x7ef07ebb) - ffxAsUInt32(value));
+}
+
+/// Calculate a medium-quality approximation for the reciprocal of a value.
+/// 
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+/// 
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateReciprocalMedium(FfxFloat32 value)
+{
+    FfxFloat32 b = ffxAsFloat(FfxUInt32(0x7ef19fff) - ffxAsUInt32(value));
+    return b * (-b * value + FfxFloat32(2.0));
+}
+
+/// Calculate a low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the reciprocal square root for.
+///
+/// @returns
+/// An approximation of the reciprocal square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateReciprocalSquareRoot(FfxFloat32 value)
+{
+    return ffxAsFloat(FfxUInt32(0x5f347d74) - (ffxAsUInt32(value) >> FfxUInt32(1)));
+}
+
+/// Calculate a low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateSqrt(FfxFloat32x2 value)
+{
+    return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast2(1u)) + ffxBroadcast2(0x1fbc4639u));
+}
+
+/// Calculate a low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateReciprocal(FfxFloat32x2 value)
+{
+    return ffxAsFloat(ffxBroadcast2(0x7ef07ebbu) - ffxAsUInt32(value));
+}
+
+/// Calculate a medium-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateReciprocalMedium(FfxFloat32x2 value)
+{
+    FfxFloat32x2 b = ffxAsFloat(ffxBroadcast2(0x7ef19fffu) - ffxAsUInt32(value));
+    return b * (-b * value + ffxBroadcast2(2.0f));
+}
+
+/// Calculate a low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateReciprocalSquareRoot(FfxFloat32x2 value)
+{
+    return ffxAsFloat(ffxBroadcast2(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast2(1u)));
+}
+
+/// Calculate a low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateSqrt(FfxFloat32x3 value)
+{
+    return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast3(1u)) + ffxBroadcast3(0x1fbc4639u));
+}
+
+/// Calculate a low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateReciprocal(FfxFloat32x3 value)
+{
+    return ffxAsFloat(ffxBroadcast3(0x7ef07ebbu) - ffxAsUInt32(value));
+}
+
+/// Calculate a medium-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateReciprocalMedium(FfxFloat32x3 value)
+{
+    FfxFloat32x3 b = ffxAsFloat(ffxBroadcast3(0x7ef19fffu) - ffxAsUInt32(value));
+    return b * (-b * value + ffxBroadcast3(2.0f));
+}
+
+/// Calculate a low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateReciprocalSquareRoot(FfxFloat32x3 value)
+{
+    return ffxAsFloat(ffxBroadcast3(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast3(1u)));
+}
+
+/// Calculate a low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateSqrt(FfxFloat32x4 value)
+{
+    return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast4(1u)) + ffxBroadcast4(0x1fbc4639u));
+}
+
+/// Calculate a low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateReciprocal(FfxFloat32x4 value)
+{
+    return ffxAsFloat(ffxBroadcast4(0x7ef07ebbu) - ffxAsUInt32(value));
+}
+
+/// Calculate a medium-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateReciprocalMedium(FfxFloat32x4 value)
+{
+    FfxFloat32x4 b = ffxAsFloat(ffxBroadcast4(0x7ef19fffu) - ffxAsUInt32(value));
+    return b * (-b * value + ffxBroadcast4(2.0f));
+}
+
+/// Calculate a low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateReciprocalSquareRoot(FfxFloat32x4 value)
+{
+    return ffxAsFloat(ffxBroadcast4(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast4(1u)));
+}
+
+/// Calculate dot product of 'a' and 'b'.
+///
+/// @param [in] a                   First vector input.
+/// @param [in] b                   Second vector input.
+///
+/// @returns
+/// The value of <c><i>a</i></c> dot <c><i>b</i></c>.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b)
+{
+    return dot(a, b);
+}
+
+/// Calculate dot product of 'a' and 'b'.
+///
+/// @param [in] a                   First vector input.
+/// @param [in] b                   Second vector input.
+///
+/// @returns
+/// The value of <c><i>a</i></c> dot <c><i>b</i></c>.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b)
+{
+    return dot(a, b);
+}
+
+/// Calculate dot product of 'a' and 'b'.
+///
+/// @param [in] a                   First vector input.
+/// @param [in] b                   Second vector input.
+///
+/// @returns
+/// The value of <c><i>a</i></c> dot <c><i>b</i></c>.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b)
+{
+    return dot(a, b);
+}
+
+
+/// Compute an approximate conversion from PQ to Gamma2 space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear 
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between PQ and Gamma2.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into Gamma2.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximatePQToGamma2Medium(FfxFloat32 a)
+{
+    return a * a * a * a;
+}
+
+/// Compute an approximate conversion from PQ to linear space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between PQ and linear.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into linear.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximatePQToLinear(FfxFloat32 a)
+{
+    return a * a * a * a * a * a * a * a;
+}
+
+/// Compute an approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateGamma2ToPQ(FfxFloat32 a)
+{
+    return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46));
+}
+
+/// Compute a more accurate approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateGamma2ToPQMedium(FfxFloat32 a)
+{
+    FfxFloat32 b  = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46));
+    FfxFloat32 b4 = b * b * b * b;
+    return b - b * (b4 - a) / (FfxFloat32(4.0) * b4);
+}
+
+/// Compute a high accuracy approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateGamma2ToPQHigh(FfxFloat32 a)
+{
+    return ffxSqrt(ffxSqrt(a));
+}
+
+/// Compute an approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateLinearToPQ(FfxFloat32 a)
+{
+    return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723));
+}
+
+/// Compute a more accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateLinearToPQMedium(FfxFloat32 a)
+{
+    FfxFloat32 b  = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723));
+    FfxFloat32 b8 = b * b * b * b * b * b * b * b;
+    return b - b * (b8 - a) / (FfxFloat32(8.0) * b8);
+}
+
+/// Compute a very accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxApproximateLinearToPQHigh(FfxFloat32 a)
+{
+    return ffxSqrt(ffxSqrt(ffxSqrt(a)));
+}
+
+/// Compute an approximate conversion from PQ to Gamma2 space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between PQ and Gamma2.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into Gamma2.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximatePQToGamma2Medium(FfxFloat32x2 a)
+{
+    return a * a * a * a;
+}
+
+/// Compute an approximate conversion from PQ to linear space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between PQ and linear.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into linear.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximatePQToLinear(FfxFloat32x2 a)
+{
+    return a * a * a * a * a * a * a * a;
+}
+
+/// Compute an approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateGamma2ToPQ(FfxFloat32x2 a)
+{
+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u));
+}
+
+/// Compute a more accurate approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateGamma2ToPQMedium(FfxFloat32x2 a)
+{
+    FfxFloat32x2 b  = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u));
+    FfxFloat32x2 b4 = b * b * b * b;
+    return b - b * (b4 - a) / (FfxFloat32(4.0) * b4);
+}
+
+/// Compute a high accuracy approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateGamma2ToPQHigh(FfxFloat32x2 a)
+{
+    return ffxSqrt(ffxSqrt(a));
+}
+
+/// Compute an approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateLinearToPQ(FfxFloat32x2 a)
+{
+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u));
+}
+
+/// Compute a more accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateLinearToPQMedium(FfxFloat32x2 a)
+{
+    FfxFloat32x2 b  = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u));
+    FfxFloat32x2 b8 = b * b * b * b * b * b * b * b;
+    return b - b * (b8 - a) / (FfxFloat32(8.0) * b8);
+}
+
+/// Compute a very accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxApproximateLinearToPQHigh(FfxFloat32x2 a)
+{
+    return ffxSqrt(ffxSqrt(ffxSqrt(a)));
+}
+
+/// Compute an approximate conversion from PQ to Gamma2 space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between PQ and Gamma2.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into Gamma2.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximatePQToGamma2Medium(FfxFloat32x3 a)
+{
+    return a * a * a * a;
+}
+
+/// Compute an approximate conversion from PQ to linear space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between PQ and linear.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into linear.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximatePQToLinear(FfxFloat32x3 a)
+{
+    return a * a * a * a * a * a * a * a;
+}
+
+/// Compute an approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateGamma2ToPQ(FfxFloat32x3 a)
+{
+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u));
+}
+
+/// Compute a more accurate approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateGamma2ToPQMedium(FfxFloat32x3 a)
+{
+    FfxFloat32x3 b  = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u));
+    FfxFloat32x3 b4 = b * b * b * b;
+    return b - b * (b4 - a) / (FfxFloat32(4.0) * b4);
+}
+
+/// Compute a high accuracy approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateGamma2ToPQHigh(FfxFloat32x3 a)
+{
+    return ffxSqrt(ffxSqrt(a));
+}
+
+/// Compute an approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateLinearToPQ(FfxFloat32x3 a)
+{
+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u));
+}
+
+/// Compute a more accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateLinearToPQMedium(FfxFloat32x3 a)
+{
+    FfxFloat32x3 b  = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u));
+    FfxFloat32x3 b8 = b * b * b * b * b * b * b * b;
+    return b - b * (b8 - a) / (FfxFloat32(8.0) * b8);
+}
+
+/// Compute a very accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxApproximateLinearToPQHigh(FfxFloat32x3 a)
+{
+    return ffxSqrt(ffxSqrt(ffxSqrt(a)));
+}
+
+/// Compute an approximate conversion from PQ to Gamma2 space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between PQ and Gamma2.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into Gamma2.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximatePQToGamma2Medium(FfxFloat32x4 a)
+{
+    return a * a * a * a;
+}
+
+/// Compute an approximate conversion from PQ to linear space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between PQ and linear.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into linear.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximatePQToLinear(FfxFloat32x4 a)
+{
+    return a * a * a * a * a * a * a * a;
+}
+
+/// Compute an approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateGamma2ToPQ(FfxFloat32x4 a)
+{
+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u));
+}
+
+/// Compute a more accurate approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateGamma2ToPQMedium(FfxFloat32x4 a)
+{
+    FfxFloat32x4 b  = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u));
+    FfxFloat32x4 b4 = b * b * b * b * b * b * b * b;
+    return b - b * (b4 - a) / (FfxFloat32(4.0) * b4);
+}
+
+/// Compute a high accuracy approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateGamma2ToPQHigh(FfxFloat32x4 a)
+{
+    return ffxSqrt(ffxSqrt(a));
+}
+
+/// Compute an approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateLinearToPQ(FfxFloat32x4 a)
+{
+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u));
+}
+
+/// Compute a more accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateLinearToPQMedium(FfxFloat32x4 a)
+{
+    FfxFloat32x4 b  = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u));
+    FfxFloat32x4 b8 = b * b * b * b * b * b * b * b;
+    return b - b * (b8 - a) / (FfxFloat32(8.0) * b8);
+}
+
+/// Compute a very accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxApproximateLinearToPQHigh(FfxFloat32x4 a)
+{
+    return ffxSqrt(ffxSqrt(ffxSqrt(a)));
+}
+
+// An approximation of sine.
+//
+// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range 
+// is {-1/4 to 1/4} representing {-1 to 1}.
+//
+// @param [in] value            The value to calculate approximate sine for.
+//
+// @returns
+// The approximate sine of <c><i>value</i></c>.
+FfxFloat32 ffxParabolicSin(FfxFloat32 value)
+{
+    return value * abs(value) - value;
+}
+
+// An approximation of sine.
+//
+// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+// is {-1/4 to 1/4} representing {-1 to 1}.
+//
+// @param [in] value            The value to calculate approximate sine for.
+//
+// @returns
+// The approximate sine of <c><i>value</i></c>.
+FfxFloat32x2 ffxParabolicSin(FfxFloat32x2 x)
+{
+    return x * abs(x) - x;
+}
+
+// An approximation of cosine.
+//
+// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+// is {-1/4 to 1/4} representing {-1 to 1}.
+//
+// @param [in] value            The value to calculate approximate cosine for.
+//
+// @returns
+// The approximate cosine of <c><i>value</i></c>.
+FfxFloat32 ffxParabolicCos(FfxFloat32 x)
+{
+    x = ffxFract(x * FfxFloat32(0.5) + FfxFloat32(0.75));
+    x = x * FfxFloat32(2.0) - FfxFloat32(1.0);
+    return ffxParabolicSin(x);
+}
+
+// An approximation of cosine.
+//
+// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+// is {-1/4 to 1/4} representing {-1 to 1}.
+//
+// @param [in] value            The value to calculate approximate cosine for.
+//
+// @returns
+// The approximate cosine of <c><i>value</i></c>.
+FfxFloat32x2 ffxParabolicCos(FfxFloat32x2 x)
+{
+    x = ffxFract(x * ffxBroadcast2(0.5f) + ffxBroadcast2(0.75f));
+    x = x * ffxBroadcast2(2.0f) - ffxBroadcast2(1.0f);
+    return ffxParabolicSin(x);
+}
+
+// An approximation of both sine and cosine.
+//
+// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+// is {-1/4 to 1/4} representing {-1 to 1}.
+//
+// @param [in] value            The value to calculate approximate cosine for.
+//
+// @returns
+// A <c><i>FfxFloat32x2</i></c> containing approximations of both sine and cosine of <c><i>value</i></c>.
+FfxFloat32x2 ffxParabolicSinCos(FfxFloat32 x)
+{
+    FfxFloat32 y = ffxFract(x * FfxFloat32(0.5) + FfxFloat32(0.75));
+    y = y * FfxFloat32(2.0) - FfxFloat32(1.0);
+    return ffxParabolicSin(FfxFloat32x2(x, y));
+}
+
+/// Conditional free logic AND operation using values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxZeroOneAnd(FfxUInt32 x, FfxUInt32 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic AND operation using two values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x, FfxUInt32x2 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic AND operation using two values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x, FfxUInt32x3 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic AND operation using two values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x, FfxUInt32x4 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic NOT operation using two values.
+///
+/// @param [in] x           The first value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxZeroOneAnd(FfxUInt32 x)
+{
+    return x ^ FfxUInt32(1);
+}
+
+/// Conditional free logic NOT operation using two values.
+///
+/// @param [in] x           The first value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x)
+{
+    return x ^ ffxBroadcast2(1u);
+}
+
+/// Conditional free logic NOT operation using two values.
+///
+/// @param [in] x           The first value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x)
+{
+    return x ^ ffxBroadcast3(1u);
+}
+
+/// Conditional free logic NOT operation using two values.
+///
+/// @param [in] x           The first value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x)
+{
+    return x ^ ffxBroadcast4(1u);
+}
+
+/// Conditional free logic OR operation using two values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxZeroOneOr(FfxUInt32 x, FfxUInt32 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxZeroOneOr(FfxUInt32x2 x, FfxUInt32x2 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x3 ffxZeroOneOr(FfxUInt32x3 x, FfxUInt32x3 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x4 ffxZeroOneOr(FfxUInt32x4 x, FfxUInt32x4 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxZeroOneAndToU1(FfxFloat32 x)
+{
+    return FfxUInt32(FfxFloat32(1.0) - x);
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxZeroOneAndToU2(FfxFloat32x2 x)
+{
+    return FfxUInt32x2(ffxBroadcast2(1.0) - x);
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x3 ffxZeroOneAndToU3(FfxFloat32x3 x)
+{
+    return FfxUInt32x3(ffxBroadcast3(1.0) - x);
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt32x4 ffxZeroOneAndToU4(FfxFloat32x4 x)
+{
+    return FfxUInt32x4(ffxBroadcast4(1.0) - x);
+}
+
+/// Conditional free logic AND operation using two values followed by a NOT operation
+/// using the resulting value and a third value.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+/// @param [in] z           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxZeroOneAndOr(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
+{
+    return ffxSaturate(x * y + z);
+}
+
+/// Conditional free logic AND operation using two values followed by a NOT operation
+/// using the resulting value and a third value.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+/// @param [in] z           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxZeroOneAndOr(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
+{
+    return ffxSaturate(x * y + z);
+}
+
+/// Conditional free logic AND operation using two values followed by a NOT operation
+/// using the resulting value and a third value.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+/// @param [in] z           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxZeroOneAndOr(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
+{
+    return ffxSaturate(x * y + z);
+}
+
+/// Conditional free logic AND operation using two values followed by a NOT operation 
+/// using the resulting value and a third value.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+/// @param [in] z           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxZeroOneAndOr(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
+{
+    return ffxSaturate(x * y + z);
+}
+
+/// Given a value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxZeroOneIsGreaterThanZero(FfxFloat32 x)
+{
+    return ffxSaturate(x * FfxFloat32(FFX_POSITIVE_INFINITY_FLOAT));
+}
+
+/// Given a value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxZeroOneIsGreaterThanZero(FfxFloat32x2 x)
+{
+    return ffxSaturate(x * ffxBroadcast2(FFX_POSITIVE_INFINITY_FLOAT));
+}
+
+/// Given a value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxZeroOneIsGreaterThanZero(FfxFloat32x3 x)
+{
+    return ffxSaturate(x * ffxBroadcast3(FFX_POSITIVE_INFINITY_FLOAT));
+}
+
+/// Given a value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxZeroOneIsGreaterThanZero(FfxFloat32x4 x)
+{
+    return ffxSaturate(x * ffxBroadcast4(FFX_POSITIVE_INFINITY_FLOAT));
+}
+
+/// Conditional free logic signed NOT operation using two FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxZeroOneAnd(FfxFloat32 x)
+{
+    return FfxFloat32(1.0) - x;
+}
+
+/// Conditional free logic signed NOT operation using two FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxZeroOneAnd(FfxFloat32x2 x)
+{
+    return ffxBroadcast2(1.0) - x;
+}
+
+/// Conditional free logic signed NOT operation using two FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxZeroOneAnd(FfxFloat32x3 x)
+{
+    return ffxBroadcast3(1.0) - x;
+}
+
+/// Conditional free logic signed NOT operation using two FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxZeroOneAnd(FfxFloat32x4 x)
+{
+    return ffxBroadcast4(1.0) - x;
+}
+
+/// Conditional free logic OR operation using two FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxZeroOneOr(FfxFloat32 x, FfxFloat32 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxZeroOneOr(FfxFloat32x2 x, FfxFloat32x2 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxZeroOneOr(FfxFloat32x3 x, FfxFloat32x3 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxZeroOneOr(FfxFloat32x4 x, FfxFloat32x4 y)
+{
+    return max(x, y);
+}
+
+/// Choose between two FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x           The value to compare against zero.
+/// @param [in] y           The value to return if the comparision is greater than zero.
+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxZeroOneSelect(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
+{
+    FfxFloat32 r = (-x) * z + z;
+    return x * y + r;
+}
+
+/// Choose between two FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x           The value to compare against zero.
+/// @param [in] y           The value to return if the comparision is greater than zero.
+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxZeroOneSelect(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
+{
+    FfxFloat32x2 r = (-x) * z + z;
+    return x * y + r;
+}
+
+/// Choose between two FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x           The value to compare against zero.
+/// @param [in] y           The value to return if the comparision is greater than zero.
+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxZeroOneSelect(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
+{
+    FfxFloat32x3 r = (-x) * z + z;
+    return x * y + r;
+}
+
+/// Choose between two FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x           The value to compare against zero.
+/// @param [in] y           The value to return if the comparision is greater than zero.
+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxZeroOneSelect(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
+{
+    FfxFloat32x4 r = (-x) * z + z;
+    return x * y + r;
+}
+
+/// Given a value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxZeroOneIsSigned(FfxFloat32 x)
+{
+    return ffxSaturate(x * FfxFloat32(FFX_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// Given a value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxZeroOneIsSigned(FfxFloat32x2 x)
+{
+    return ffxSaturate(x * ffxBroadcast2(FFX_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// Given a value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxZeroOneIsSigned(FfxFloat32x3 x)
+{
+    return ffxSaturate(x * ffxBroadcast3(FFX_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// Given a value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPUCore
+FfxFloat32x4 ffxZeroOneIsSigned(FfxFloat32x4 x)
+{
+    return ffxSaturate(x * ffxBroadcast4(FFX_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// Compute a Rec.709 color space.
+/// 
+/// Rec.709 is used for some HDTVs.
+/// 
+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
+///  (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
+///  (b.) For 8-bit  709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
+///
+/// @param [in] color           The color to convert to Rec. 709.
+/// 
+/// @returns
+/// The <c><i>color</i></c> in linear space.
+/// 
+/// @ingroup GPUCore
+FfxFloat32 ffxRec709FromLinear(FfxFloat32 color)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45);
+    FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099);
+    return clamp(j.x, color * j.y, pow(color, j.z) * k.x + k.y);
+}
+
+/// Compute a Rec.709 color space.
+///
+/// Rec.709 is used for some HDTVs.
+///
+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
+///  (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
+///  (b.) For 8-bit  709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
+///
+/// @param [in] color           The color to convert to Rec. 709.
+///
+/// @returns
+/// The <c><i>color</i></c> in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxRec709FromLinear(FfxFloat32x2 color)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45);
+    FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099);
+    return clamp(j.xx, color * j.yy, pow(color, j.zz) * k.xx + k.yy);
+}
+
+/// Compute a Rec.709 color space.
+///
+/// Rec.709 is used for some HDTVs.
+///
+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
+///  (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
+///  (b.) For 8-bit  709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
+///
+/// @param [in] color           The color to convert to Rec. 709.
+///
+/// @returns
+/// The <c><i>color</i></c> in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxRec709FromLinear(FfxFloat32x3 color)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45);
+    FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099);
+    return clamp(j.xxx, color * j.yyy, pow(color, j.zzz) * k.xxx + k.yyy);
+}
+
+/// Compute a gamma value from a linear value.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+/// 
+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in <c><i>ffxLinearFromGamma</i></c>.
+/// 
+/// @param [in] value           The value to convert to gamma space from linear.
+/// @param [in] power           The reciprocal of power value used for the gamma curve.
+///
+/// @returns
+/// A value in gamma space.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxGammaFromLinear(FfxFloat32 value, FfxFloat32 power)
+{
+    return pow(value, FfxFloat32(power));
+}
+
+/// Compute a gamma value from a linear value.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+/// 
+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in <c><i>ffxLinearFromGamma</i></c>.
+///
+/// @param [in] value           The value to convert to gamma space from linear.
+/// @param [in] power           The reciprocal of power value used for the gamma curve.
+///
+/// @returns
+/// A value in gamma space.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxGammaFromLinear(FfxFloat32x2 value, FfxFloat32 power)
+{
+    return pow(value, ffxBroadcast2(power));
+}
+
+/// Compute a gamma value from a linear value.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in <c><i>ffxLinearFromGamma</i></c>.
+///
+/// @param [in] value           The value to convert to gamma space from linear.
+/// @param [in] power           The reciprocal of power value used for the gamma curve.
+///
+/// @returns
+/// A value in gamma space.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxGammaFromLinear(FfxFloat32x3 value, FfxFloat32 power)
+{
+    return pow(value, ffxBroadcast3(power));
+}
+
+/// Compute a PQ value from a linear value.
+///
+/// @param [in] value           The value to convert to PQ from linear.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxPQToLinear(FfxFloat32 value)
+{
+    FfxFloat32 p = pow(value, FfxFloat32(0.159302));
+    return pow((FfxFloat32(0.835938) + FfxFloat32(18.8516) * p) / (FfxFloat32(1.0) + FfxFloat32(18.6875) * p), FfxFloat32(78.8438));
+}
+
+/// Compute a PQ value from a linear value.
+///
+/// @param [in] value           The value to convert to PQ from linear.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxPQToLinear(FfxFloat32x2 value)
+{
+    FfxFloat32x2 p = pow(value, ffxBroadcast2(0.159302));
+    return pow((ffxBroadcast2(0.835938) + ffxBroadcast2(18.8516) * p) / (ffxBroadcast2(1.0) + ffxBroadcast2(18.6875) * p), ffxBroadcast2(78.8438));
+}
+
+/// Compute a PQ value from a linear value.
+///
+/// @param [in] value           The value to convert to PQ from linear.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxPQToLinear(FfxFloat32x3 value)
+{
+    FfxFloat32x3 p = pow(value, ffxBroadcast3(0.159302));
+    return pow((ffxBroadcast3(0.835938) + ffxBroadcast3(18.8516) * p) / (ffxBroadcast3(1.0) + ffxBroadcast3(18.6875) * p), ffxBroadcast3(78.8438));
+}
+
+/// Compute a linear value from a SRGB value.
+///
+/// @param [in] value           The value to convert to linear from SRGB.
+///
+/// @returns
+/// A value in SRGB space.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxSrgbToLinear(FfxFloat32 value)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+    FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055);
+    return clamp(j.x, value * j.y, pow(value, j.z) * k.x + k.y);
+}
+
+/// Compute a linear value from a SRGB value.
+///
+/// @param [in] value           The value to convert to linear from SRGB.
+///
+/// @returns
+/// A value in SRGB space.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxSrgbToLinear(FfxFloat32x2 value)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+    FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055);
+    return clamp(j.xx, value * j.yy, pow(value, j.zz) * k.xx + k.yy);
+}
+
+/// Compute a linear value from a SRGB value.
+///
+/// @param [in] value           The value to convert to linear from SRGB.
+///
+/// @returns
+/// A value in SRGB space.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxSrgbToLinear(FfxFloat32x3 value)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+    FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055);
+    return clamp(j.xxx, value * j.yyy, pow(value, j.zzz) * k.xxx + k.yyy);
+}
+
+/// Compute a linear value from a REC.709 value.
+///
+/// @param [in] color           The value to convert to linear from REC.709.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxLinearFromRec709(FfxFloat32 color)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
+    FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099);
+    return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.x), color * j.y, pow(color * k.x + k.y, j.z));
+}
+
+/// Compute a linear value from a REC.709 value.
+///
+/// @param [in] color           The value to convert to linear from REC.709.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxLinearFromRec709(FfxFloat32x2 color)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
+    FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099);
+    return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xx), color * j.yy, pow(color * k.xx + k.yy, j.zz));
+}
+
+/// Compute a linear value from a REC.709 value.
+///
+/// @param [in] color           The value to convert to linear from REC.709.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxLinearFromRec709(FfxFloat32x3 color)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
+    FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099);
+    return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xxx), color * j.yyy, pow(color * k.xxx + k.yyy, j.zzz));
+}
+
+/// Compute a linear value from a value in a gamma space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] color           The value to convert to linear in gamma space.
+/// @param [in] power           The power value used for the gamma curve.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxLinearFromGamma(FfxFloat32 color, FfxFloat32 power)
+{
+    return pow(color, FfxFloat32(power));
+}
+
+/// Compute a linear value from a value in a gamma space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] color           The value to convert to linear in gamma space.
+/// @param [in] power           The power value used for the gamma curve.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxLinearFromGamma(FfxFloat32x2 color, FfxFloat32 power)
+{
+    return pow(color, ffxBroadcast2(power));
+}
+
+/// Compute a linear value from a value in a gamma space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] color           The value to convert to linear in gamma space.
+/// @param [in] power           The power value used for the gamma curve.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxLinearFromGamma(FfxFloat32x3 color, FfxFloat32 power)
+{
+    return pow(color, ffxBroadcast3(power));
+}
+
+/// Compute a linear value from a value in a PQ space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] value           The value to convert to linear in PQ space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxLinearFromPQ(FfxFloat32 value)
+{
+    FfxFloat32 p = pow(value, FfxFloat32(0.0126833));
+    return pow(ffxSaturate(p - FfxFloat32(0.835938)) / (FfxFloat32(18.8516) - FfxFloat32(18.6875) * p), FfxFloat32(6.27739));
+}
+
+/// Compute a linear value from a value in a PQ space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] value           The value to convert to linear in PQ space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxLinearFromPQ(FfxFloat32x2 value)
+{
+    FfxFloat32x2 p = pow(value, ffxBroadcast2(0.0126833));
+    return pow(ffxSaturate(p - ffxBroadcast2(0.835938)) / (ffxBroadcast2(18.8516) - ffxBroadcast2(18.6875) * p), ffxBroadcast2(6.27739));
+}
+
+/// Compute a linear value from a value in a PQ space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] value           The value to convert to linear in PQ space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxLinearFromPQ(FfxFloat32x3 value)
+{
+    FfxFloat32x3 p = pow(value, ffxBroadcast3(0.0126833));
+    return pow(ffxSaturate(p - ffxBroadcast3(0.835938)) / (ffxBroadcast3(18.8516) - ffxBroadcast3(18.6875) * p), ffxBroadcast3(6.27739));
+}
+
+/// Compute a linear value from a value in a SRGB space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] value           The value to convert to linear in SRGB space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32 ffxLinearFromSrgb(FfxFloat32 value)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+    FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
+    return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.x), value * j.y, pow(value * k.x + k.y, j.z));
+}
+
+/// Compute a linear value from a value in a SRGB space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] value           The value to convert to linear in SRGB space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 value)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+    FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
+    return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xx), value * j.yy, pow(value * k.xx + k.yy, j.zz));
+}
+
+/// Compute a linear value from a value in a SRGB space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] value           The value to convert to linear in SRGB space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat32x3 ffxLinearFromSrgb(FfxFloat32x3 value)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+    FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
+    return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xxx), value * j.yyy, pow(value * k.xxx + k.yyy, j.zzz));
+}
+
+/// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear.
+/// 
+///  543210
+///  ======
+///  ..xxx.
+///  yy...y
+/// 
+/// @param [in] a       The input 1D coordinates to remap.
+///
+/// @returns
+/// The remapped 2D coordinates.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxRemapForQuad(FfxUInt32 a)
+{
+    return FfxUInt32x2(bitfieldExtract(a, 1u, 3u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), a, 1u));
+}
+
+/// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions.
+///
+/// The 64-wide lane indices to 8x8 remapping is performed as follows:
+/// 
+///     00 01 08 09 10 11 18 19
+///     02 03 0a 0b 12 13 1a 1b
+///     04 05 0c 0d 14 15 1c 1d
+///     06 07 0e 0f 16 17 1e 1f
+///     20 21 28 29 30 31 38 39
+///     22 23 2a 2b 32 33 3a 3b
+///     24 25 2c 2d 34 35 3c 3d
+///     26 27 2e 2f 36 37 3e 3f
+///
+/// @param [in] a       The input 1D coordinate to remap.
+/// 
+/// @returns
+/// The remapped 2D coordinates.
+/// 
+/// @ingroup GPUCore
+FfxUInt32x2 ffxRemapForWaveReduction(FfxUInt32 a)
+{
+    return FfxUInt32x2(bitfieldInsertMask(bitfieldExtract(a, 2u, 3u), a, 1u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), bitfieldExtract(a, 1u, 2u), 2u));
+}
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common.h.meta
new file mode 100644
index 00000000..ec380d2d
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: 05b921699d1374a429e32afca13137e2
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common_half.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common_half.h
new file mode 100644
index 00000000..4c73daf1
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common_half.h
@@ -0,0 +1,2979 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#if FFX_HALF
+#if FFX_HLSL_SM >= 62
+/// A define value for 16bit positive infinity.
+///
+/// @ingroup GPUCore
+#define FFX_POSITIVE_INFINITY_HALF FFX_TO_FLOAT16((uint16_t)0x7c00u)
+
+/// A define value for 16bit negative infinity.
+///
+/// @ingroup GPUCore
+#define FFX_NEGATIVE_INFINITY_HALF FFX_TO_FLOAT16((uint16_t)0xfc00u)
+#else
+/// A define value for 16bit positive infinity.
+///
+/// @ingroup GPUCore
+#define FFX_POSITIVE_INFINITY_HALF FFX_TO_FLOAT16(0x7c00u)
+
+/// A define value for 16bit negative infinity.
+///
+/// @ingroup GPUCore
+#define FFX_NEGATIVE_INFINITY_HALF FFX_TO_FLOAT16(0xfc00u)
+#endif // #if FFX_HLSL_SM>=62
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxMin(FfxFloat16 x, FfxFloat16 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxMin(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxMin(FfxFloat16x3 x, FfxFloat16x3 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxMin(FfxFloat16x4 x, FfxFloat16x4 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt16 ffxMin(FfxInt16 x, FfxInt16 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt16x2 ffxMin(FfxInt16x2 x, FfxInt16x2 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt16x3 ffxMin(FfxInt16x3 x, FfxInt16x3 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt16x4 ffxMin(FfxInt16x4 x, FfxInt16x4 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt16 ffxMin(FfxUInt16 x, FfxUInt16 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt16x2 ffxMin(FfxUInt16x2 x, FfxUInt16x2 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt16x3 ffxMin(FfxUInt16x3 x, FfxUInt16x3 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt16x4 ffxMin(FfxUInt16x4 x, FfxUInt16x4 y)
+{
+    return min(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxMax(FfxFloat16 x, FfxFloat16 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxMax(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxMax(FfxFloat16x3 x, FfxFloat16x3 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxMax(FfxFloat16x4 x, FfxFloat16x4 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt16 ffxMax(FfxInt16 x, FfxInt16 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt16x2 ffxMax(FfxInt16x2 x, FfxInt16x2 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt16x3 ffxMax(FfxInt16x3 x, FfxInt16x3 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxInt16x4 ffxMax(FfxInt16x4 x, FfxInt16x4 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt16 ffxMax(FfxUInt16 x, FfxUInt16 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt16x2 ffxMax(FfxUInt16x2 x, FfxUInt16x2 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt16x3 ffxMax(FfxUInt16x3 x, FfxUInt16x3 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPUCore
+FfxUInt16x4 ffxMax(FfxUInt16x4 x, FfxUInt16x4 y)
+{
+    return max(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x                   The value to raise to the power y.
+/// @param [in] y                   The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxPow(FfxFloat16 x, FfxFloat16 y)
+{
+    return pow(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x                   The value to raise to the power y.
+/// @param [in] y                   The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPow(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+    return pow(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x                   The value to raise to the power y.
+/// @param [in] y                   The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxPow(FfxFloat16x3 x, FfxFloat16x3 y)
+{
+    return pow(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x                   The value to raise to the power y.
+/// @param [in] y                   The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxPow(FfxFloat16x4 x, FfxFloat16x4 y)
+{
+    return pow(x, y);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x                   The first value to compute the min of.
+///
+/// @returns
+/// The the square root of <c><i>x</i></c>.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxSqrt(FfxFloat16 x)
+{
+    return sqrt(x);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x                   The first value to compute the min of.
+///
+/// @returns
+/// The the square root of <c><i>x</i></c>.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxSqrt(FfxFloat16x2 x)
+{
+    return sqrt(x);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x                   The first value to compute the min of.
+///
+/// @returns
+/// The the square root of <c><i>x</i></c>.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxSqrt(FfxFloat16x3 x)
+{
+    return sqrt(x);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x                   The first value to compute the min of.
+///
+/// @returns
+/// The the square root of <c><i>x</i></c>.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxSqrt(FfxFloat16x4 x)
+{
+    return sqrt(x);
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d                   The value to copy the sign bit into.
+/// @param [in] s                   The value to copy the sign bit from.
+/// 
+/// @returns
+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
+/// 
+/// @ingroup GPUCore
+FfxFloat16 ffxCopySignBitHalf(FfxFloat16 d, FfxFloat16 s)
+{
+    return FFX_TO_FLOAT16(FFX_TO_UINT16(d) | (FFX_TO_UINT16(s) & FFX_BROADCAST_UINT16(0x8000u)));
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d                   The value to copy the sign bit into.
+/// @param [in] s                   The value to copy the sign bit from.
+/// 
+/// @returns
+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
+/// 
+/// @ingroup GPUCore
+FfxFloat16x2 ffxCopySignBitHalf(FfxFloat16x2 d, FfxFloat16x2 s)
+{
+    return FFX_TO_FLOAT16X2(FFX_TO_UINT16X2(d) | (FFX_TO_UINT16X2(s) & FFX_BROADCAST_UINT16X2(0x8000u)));
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d                   The value to copy the sign bit into.
+/// @param [in] s                   The value to copy the sign bit from.
+/// 
+/// @returns
+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
+/// 
+/// @ingroup GPUCore
+FfxFloat16x3 ffxCopySignBitHalf(FfxFloat16x3 d, FfxFloat16x3 s)
+{
+    return FFX_TO_FLOAT16X3(FFX_TO_UINT16X3(d) | (FFX_TO_UINT16X3(s) & FFX_BROADCAST_UINT16X3(0x8000u)));
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d                   The value to copy the sign bit into.
+/// @param [in] s                   The value to copy the sign bit from.
+/// 
+/// @returns
+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
+/// 
+/// @ingroup GPUCore
+FfxFloat16x4 ffxCopySignBitHalf(FfxFloat16x4 d, FfxFloat16x4 s)
+{
+    return FFX_TO_FLOAT16X4(FFX_TO_UINT16X4(d) | (FFX_TO_UINT16X4(s) & FFX_BROADCAST_UINT16X4(0x8000u)));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 0
+///     m >= 0  := 0
+///     m < 0   := 1
+///
+/// Uses the following useful floating point logic,
+///     saturate(+a*(-INF)==-INF) := 0
+///     saturate( 0*(-INF)== NaN) := 0
+///     saturate(-a*(-INF)==+INF) := 1
+/// 
+/// This function is useful when creating masks for branch-free logic.
+/// 
+/// @param [in] m                       The value to test against 0.
+/// 
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
+/// 
+/// @ingroup GPUCore
+FfxFloat16 ffxIsSignedHalf(FfxFloat16 m)
+{
+    return FfxFloat16(ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF)));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 0
+///     m >= 0  := 0
+///     m < 0   := 1
+///
+/// Uses the following useful floating point logic,
+///     saturate(+a*(-INF)==-INF) := 0
+///     saturate( 0*(-INF)== NaN) := 0
+///     saturate(-a*(-INF)==+INF) := 1
+/// 
+/// This function is useful when creating masks for branch-free logic.
+/// 
+/// @param [in] m                       The value to test against 0.
+/// 
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
+/// 
+/// @ingroup GPUCore
+FfxFloat16x2 ffxIsSignedHalf(FfxFloat16x2 m)
+{
+    return FfxFloat16x2(ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF)));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 0
+///     m >= 0  := 0
+///     m < 0   := 1
+///
+/// Uses the following useful floating point logic,
+///     saturate(+a*(-INF)==-INF) := 0
+///     saturate( 0*(-INF)== NaN) := 0
+///     saturate(-a*(-INF)==+INF) := 1
+/// 
+/// This function is useful when creating masks for branch-free logic.
+/// 
+/// @param [in] m                       The value to test against 0.
+/// 
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
+/// 
+/// @ingroup GPUCore
+FfxFloat16x3 ffxIsSignedHalf(FfxFloat16x3 m)
+{
+    return FfxFloat16x3(ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF)));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 0
+///     m >= 0  := 0
+///     m < 0   := 1
+///
+/// Uses the following useful floating point logic,
+///     saturate(+a*(-INF)==-INF) := 0
+///     saturate( 0*(-INF)== NaN) := 0
+///     saturate(-a*(-INF)==+INF) := 1
+/// 
+/// This function is useful when creating masks for branch-free logic.
+/// 
+/// @param [in] m                       The value to test against 0.
+/// 
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
+/// 
+/// @ingroup GPUCore
+FfxFloat16x4 ffxIsSignedHalf(FfxFloat16x4 m)
+{
+    return FfxFloat16x4(ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF)));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 1
+///     m > 0   := 0
+///     m <= 0  := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m                       The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxIsGreaterThanZeroHalf(FfxFloat16 m)
+{
+    return FfxFloat16(ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF)));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 1
+///     m > 0   := 0
+///     m <= 0  := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m                       The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxIsGreaterThanZeroHalf(FfxFloat16x2 m)
+{
+    return FfxFloat16x2(ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF)));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 1
+///     m > 0   := 0
+///     m <= 0  := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m                       The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxIsGreaterThanZeroHalf(FfxFloat16x3 m)
+{
+    return FfxFloat16x3(ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF)));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 1
+///     m > 0   := 0
+///     m <= 0  := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m                       The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxIsGreaterThanZeroHalf(FfxFloat16x4 m)
+{
+    return FfxFloat16x4(ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF)));
+}
+
+/// Convert a 16bit floating point value to sortable integer.
+/// 
+///  - If sign bit=0, flip the sign bit (positives).
+///  - If sign bit=1, flip all bits     (negatives).
+/// 
+/// The function has the side effects that:
+///  - Larger integers are more positive values.
+///  - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage).
+/// 
+/// @param [in] x                       The floating point value to make sortable.
+/// 
+/// @returns
+/// The sortable integer value.
+/// 
+/// @ingroup GPUCore
+FfxUInt16 ffxFloatToSortableIntegerHalf(FfxUInt16 x)
+{
+    return x ^ ((ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16(15))) | FFX_BROADCAST_UINT16(0x8000));
+}
+
+/// Convert a sortable integer to a 16bit floating point value.
+///
+/// The function has the side effects that:
+///  - If sign bit=1, flip the sign bit (positives).
+///  - If sign bit=0, flip all bits     (negatives).
+///
+/// @param [in] x                       The sortable integer value to make floating point.
+///
+/// @returns
+/// The floating point value.
+///
+/// @ingroup GPUCore
+FfxUInt16 ffxSortableIntegerToFloatHalf(FfxUInt16 x)
+{
+    return x ^ ((~ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16(15))) | FFX_BROADCAST_UINT16(0x8000));
+}
+
+/// Convert a pair of 16bit floating point values to a pair of sortable integers.
+/// 
+///  - If sign bit=0, flip the sign bit (positives).
+///  - If sign bit=1, flip all bits     (negatives).
+/// 
+/// The function has the side effects that:
+///  - Larger integers are more positive values.
+///  - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage).
+/// 
+/// @param [in] x                       The floating point values to make sortable.
+/// 
+/// @returns
+/// The sortable integer values.
+/// 
+/// @ingroup GPUCore
+FfxUInt16x2 ffxFloatToSortableIntegerHalf(FfxUInt16x2 x)
+{
+    return x ^ ((ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16X2(15))) | FFX_BROADCAST_UINT16X2(0x8000));
+}
+
+/// Convert a pair of sortable integers to a pair of 16bit floating point values.
+///
+/// The function has the side effects that:
+///  - If sign bit=1, flip the sign bit (positives).
+///  - If sign bit=0, flip all bits     (negatives).
+///
+/// @param [in] x                       The sortable integer values to make floating point.
+///
+/// @returns
+/// The floating point values.
+///
+/// @ingroup GPUCore
+FfxUInt16x2 ffxSortableIntegerToFloatHalf(FfxUInt16x2 x)
+{
+    return x ^ ((~ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16X2(15))) | FFX_BROADCAST_UINT16X2(0x8000));
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// [Zero] Y0 [Zero] X0
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesZeroY0ZeroX0(FfxUInt32x2 i)
+{
+    return ((i.x) & 0xffu) | ((i.y << 16) & 0xff0000u);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// [Zero] Y1 [Zero] X1
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesZeroY1ZeroX1(FfxUInt32x2 i)
+{
+    return ((i.x >> 8) & 0xffu) | ((i.y << 8) & 0xff0000u);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// [Zero] Y2 [Zero] X2
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesZeroY2ZeroX2(FfxUInt32x2 i)
+{
+    return ((i.x >> 16) & 0xffu) | ((i.y) & 0xff0000u);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// [Zero] Y3 [Zero] X3
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesZeroY3ZeroX3(FfxUInt32x2 i)
+{
+    return ((i.x >> 24) & 0xffu) | ((i.y >> 8) & 0xff0000u);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y3 Y2 Y1 X0
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesY3Y2Y1X0(FfxUInt32x2 i)
+{
+    return ((i.x) & 0x000000ffu) | (i.y & 0xffffff00u);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y3 Y2 Y1 X2
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesY3Y2Y1X2(FfxUInt32x2 i)
+{
+    return ((i.x >> 16) & 0x000000ffu) | (i.y & 0xffffff00u);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y3 Y2 X0 Y0
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesY3Y2X0Y0(FfxUInt32x2 i)
+{
+    return ((i.x << 8) & 0x0000ff00u) | (i.y & 0xffff00ffu);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y3 Y2 X2 Y0
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesY3Y2X2Y0(FfxUInt32x2 i)
+{
+    return ((i.x >> 8) & 0x0000ff00u) | (i.y & 0xffff00ffu);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y3 X0 Y1 Y0
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesY3X0Y1Y0(FfxUInt32x2 i)
+{
+    return ((i.x << 16) & 0x00ff0000u) | (i.y & 0xff00ffffu);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y3 X2 Y1 Y0
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesY3X2Y1Y0(FfxUInt32x2 i)
+{
+    return ((i.x) & 0x00ff0000u) | (i.y & 0xff00ffffu);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// X0 Y2 Y1 Y0
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesX0Y2Y1Y0(FfxUInt32x2 i)
+{
+    return ((i.x << 24) & 0xff000000u) | (i.y & 0x00ffffffu);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// X2 Y2 Y1 Y0
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesX2Y2Y1Y0(FfxUInt32x2 i)
+{
+    return ((i.x << 8) & 0xff000000u) | (i.y & 0x00ffffffu);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y2 X2 Y0 X0
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesY2X2Y0X0(FfxUInt32x2 i)
+{
+    return ((i.x) & 0x00ff00ffu) | ((i.y << 8) & 0xff00ff00u);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y2 Y0 X2 X0
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPUCore
+FfxUInt32 ffxPackBytesY2Y0X2X0(FfxUInt32x2 i)
+{
+    return (((i.x) & 0xffu) | ((i.x >> 8) & 0xff00u) | ((i.y << 16) & 0xff0000u) | ((i.y << 8) & 0xff000000u));
+}
+
+/// Takes two Float16x2 values x and y, normalizes them and builds a single Uint16x2 value in the format {{x0,y0},{x1,y1}}.
+///
+/// @param [in] x                       The first float16x2 value to pack.
+/// @param [in] y                       The second float16x2 value to pack.
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt16x2 ffxPackX0Y0X1Y1UnsignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+    x *= FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0);
+    y *= FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0);
+    return FFX_UINT32_TO_UINT16X2(ffxPackBytesY2X2Y0X0(FfxUInt32x2(FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(x)), FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(y)))));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7],   
+/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
+///
+/// r=ffxPermuteUByte0Float16x2ToUint2(d,i)
+///   Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits
+///   Where 'k1' is an SGPR with 0x????
+///   Where 'k2' is an SGPR with 0x????
+///   V_PK_FMA_F16 i,i,k0.x,0
+///   V_PERM_B32 r.x,i,i,k1
+///   V_PERM_B32 r.y,i,i,k2
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteUByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0)));
+    return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15],   
+/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
+///
+/// r=ffxPermuteUByte1Float16x2ToUint2(d,i)
+///   Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits
+///   Where 'k1' is an SGPR with 0x????
+///   Where 'k2' is an SGPR with 0x????
+///   V_PK_FMA_F16 i,i,k0.x,0
+///   V_PERM_B32 r.x,i,i,k1
+///   V_PERM_B32 r.y,i,i,k2
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteUByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0)));
+    return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23],   
+/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops.
+///
+/// r=ffxPermuteUByte2Float16x2ToUint2(d,i)
+///   Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits
+///   Where 'k1' is an SGPR with 0x????
+///   Where 'k2' is an SGPR with 0x????
+///   V_PK_FMA_F16 i,i,k0.x,0
+///   V_PERM_B32 r.x,i,i,k1
+///   V_PERM_B32 r.y,i,i,k2
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteUByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0)));
+    return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31],   
+/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops.
+///
+/// r=ffxPermuteUByte3Float16x2ToUint2(d,i)
+///   Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits
+///   Where 'k1' is an SGPR with 0x????
+///   Where 'k2' is an SGPR with 0x????
+///   V_PK_FMA_F16 i,i,k0.x,0
+///   V_PERM_B32 r.x,i,i,k1
+///   V_PERM_B32 r.y,i,i,k2
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteUByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0)));
+    return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops.  
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteUByte0Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFX_BROADCAST_FLOAT16X2(32768.0);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops.  
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteUByte1Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFX_BROADCAST_FLOAT16X2(32768.0);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops.  
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteUByte2Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFX_BROADCAST_FLOAT16X2(32768.0);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops.  
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteUByte3Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFX_BROADCAST_FLOAT16X2(32768.0);
+}
+
+/// Takes two Float16x2 values x and y, normalizes them and builds a single Uint16x2 value in the format {{x0,y0},{x1,y1}}.
+///
+/// @param [in] x                       The first float16x2 value to pack.
+/// @param [in] y                       The second float16x2 value to pack.
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt16x2 ffxPackX0Y0X1Y1SignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+    x = x * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0);
+    y = y * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0);
+    return FFX_UINT32_TO_UINT16X2(ffxPackBytesY2X2Y0X0(FfxUInt32x2(FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(x)), FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(y)))));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7],   
+/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0)));
+    return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15],   
+/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0)));
+    return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23],   
+/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0)));
+    return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31],   
+/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0)));
+    return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7],   
+/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
+///
+/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero).
+/// This is useful if there is a desire for cleared values to decode as zero.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteZeroBasedSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u;
+    return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15],   
+/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
+///
+/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero).
+/// This is useful if there is a desire for cleared values to decode as zero.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteZeroBasedSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u;
+    return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23],   
+/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops.
+///
+/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero).
+/// This is useful if there is a desire for cleared values to decode as zero.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteZeroBasedSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u;
+    return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31],   
+/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops.
+///
+/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero).
+/// This is useful if there is a desire for cleared values to decode as zero.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPUCore
+FfxUInt32x2 ffxPermuteZeroBasedSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u;
+    return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops.  
+///
+/// Handles signed byte values.
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteSByte0Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops.  
+///
+/// Handles signed byte values.
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteSByte1Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops.
+///  
+/// Handles signed byte values.
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteSByte2Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops.  
+///
+/// Handles signed byte values.
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteSByte3Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops.
+///  
+/// Handles signed byte values.
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteZeroBasedSByte0Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops.
+///  
+/// Handles signed byte values.
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteZeroBasedSByte1Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops.
+///  
+/// Handles signed byte values.
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteZeroBasedSByte2Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops.
+///  
+/// Handles signed byte values.
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxPermuteZeroBasedSByte3Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Calculate a half-precision low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxApproximateSqrtHalf(FfxFloat16 a)
+{
+    return FFX_TO_FLOAT16((FFX_TO_UINT16(a) >> FFX_BROADCAST_UINT16(1)) + FFX_BROADCAST_UINT16(0x1de2));
+}
+
+/// Calculate a half-precision low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxApproximateSqrtHalf(FfxFloat16x2 a)
+{
+    return FFX_TO_FLOAT16X2((FFX_TO_UINT16X2(a) >> FFX_BROADCAST_UINT16X2(1)) + FFX_BROADCAST_UINT16X2(0x1de2));
+}
+
+/// Calculate a half-precision low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxApproximateSqrtHalf(FfxFloat16x3 a)
+{
+    return FFX_TO_FLOAT16X3((FFX_TO_UINT16X3(a) >> FFX_BROADCAST_UINT16X3(1)) + FFX_BROADCAST_UINT16X3(0x1de2));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxApproximateReciprocalHalf(FfxFloat16 a)
+{
+    return FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x7784) - FFX_TO_UINT16(a));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxApproximateReciprocalHalf(FfxFloat16x2 a)
+{
+    return FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x7784) - FFX_TO_UINT16X2(a));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxApproximateReciprocalHalf(FfxFloat16x3 a)
+{
+    return FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x7784) - FFX_TO_UINT16X3(a));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxApproximateReciprocalHalf(FfxFloat16x4 a)
+{
+    return FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x7784) - FFX_TO_UINT16X4(a));
+}
+
+/// Calculate a half-precision medium-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxApproximateReciprocalMediumHalf(FfxFloat16 a)
+{
+    FfxFloat16 b = FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x778d) - FFX_TO_UINT16(a));
+    return b * (-b * a + FFX_BROADCAST_FLOAT16(2.0));
+}
+
+/// Calculate a half-precision medium-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxApproximateReciprocalMediumHalf(FfxFloat16x2 a)
+{
+    FfxFloat16x2 b = FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x778d) - FFX_TO_UINT16X2(a));
+    return b * (-b * a + FFX_BROADCAST_FLOAT16X2(2.0));
+}
+
+/// Calculate a half-precision medium-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxApproximateReciprocalMediumHalf(FfxFloat16x3 a)
+{
+    FfxFloat16x3 b = FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x778d) - FFX_TO_UINT16X3(a));
+    return b * (-b * a + FFX_BROADCAST_FLOAT16X3(2.0));
+}
+
+/// Calculate a half-precision medium-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxApproximateReciprocalMediumHalf(FfxFloat16x4 a)
+{
+    FfxFloat16x4 b = FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x778d) - FFX_TO_UINT16X4(a));
+    return b * (-b * a + FFX_BROADCAST_FLOAT16X4(2.0));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal of the square root for.
+///
+/// @returns
+/// An approximation of the reciprocal of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxApproximateReciprocalSquareRootHalf(FfxFloat16 a)
+{
+    return FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x59a3) - (FFX_TO_UINT16(a) >> FFX_BROADCAST_UINT16(1)));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal of the square root for.
+///
+/// @returns
+/// An approximation of the reciprocal of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x2 a)
+{
+    return FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x59a3) - (FFX_TO_UINT16X2(a) >> FFX_BROADCAST_UINT16X2(1)));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal of the square root for.
+///
+/// @returns
+/// An approximation of the reciprocal of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x3 a)
+{
+    return FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x59a3) - (FFX_TO_UINT16X3(a) >> FFX_BROADCAST_UINT16X3(1)));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal of the square root for.
+///
+/// @returns
+/// An approximation of the reciprocal of the square root, estimated to low quality.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x4 a)
+{
+    return FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x59a3) - (FFX_TO_UINT16X4(a) >> FFX_BROADCAST_UINT16X4(1)));
+}
+
+/// An approximation of sine.
+///
+/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+/// is {-1/4 to 1/4} representing {-1 to 1}.
+///
+/// @param [in] x            The value to calculate approximate sine for.
+///
+/// @returns
+/// The approximate sine of <c><i>value</i></c>.
+FfxFloat16 ffxParabolicSinHalf(FfxFloat16 x)
+{
+    return x * abs(x) - x;
+}
+
+/// An approximation of sine.
+///
+/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+/// is {-1/4 to 1/4} representing {-1 to 1}.
+///
+/// @param [in] x            The value to calculate approximate sine for.
+///
+/// @returns
+/// The approximate sine of <c><i>value</i></c>.
+FfxFloat16x2 ffxParabolicSinHalf(FfxFloat16x2 x)
+{
+    return x * abs(x) - x;
+}
+
+/// An approximation of cosine.
+///
+/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+/// is {-1/4 to 1/4} representing {-1 to 1}.
+///
+/// @param [in] x            The value to calculate approximate cosine for.
+///
+/// @returns
+/// The approximate cosine of <c><i>value</i></c>.
+FfxFloat16 ffxParabolicCosHalf(FfxFloat16 x)
+{
+    x = ffxFract(x * FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16(0.75));
+    x = x * FFX_BROADCAST_FLOAT16(2.0) - FFX_BROADCAST_FLOAT16(1.0);
+    return ffxParabolicSinHalf(x);
+}
+
+/// An approximation of cosine.
+///
+/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+/// is {-1/4 to 1/4} representing {-1 to 1}.
+///
+/// @param [in] x            The value to calculate approximate cosine for.
+///
+/// @returns
+/// The approximate cosine of <c><i>value</i></c>.
+FfxFloat16x2 ffxParabolicCosHalf(FfxFloat16x2 x)
+{
+    x = ffxFract(x * FFX_BROADCAST_FLOAT16X2(0.5) + FFX_BROADCAST_FLOAT16X2(0.75));
+    x = x * FFX_BROADCAST_FLOAT16X2(2.0) - FFX_BROADCAST_FLOAT16X2(1.0);
+    return ffxParabolicSinHalf(x);
+}
+
+/// An approximation of both sine and cosine.
+///
+/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+/// is {-1/4 to 1/4} representing {-1 to 1}.
+///
+/// @param [in] x            The value to calculate approximate cosine for.
+///
+/// @returns
+/// A <c><i>FfxFloat32x2</i></c> containing approximations of both sine and cosine of <c><i>value</i></c>.
+FfxFloat16x2 ffxParabolicSinCosHalf(FfxFloat16 x)
+{
+    FfxFloat16 y = ffxFract(x * FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16(0.75));
+    y     = y * FFX_BROADCAST_FLOAT16(2.0) - FFX_BROADCAST_FLOAT16(1.0);
+    return ffxParabolicSinHalf(FfxFloat16x2(x, y));
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxUInt16 ffxZeroOneAndHalf(FfxUInt16 x, FfxUInt16 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxUInt16x2 ffxZeroOneAndHalf(FfxUInt16x2 x, FfxUInt16x2 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxUInt16x3 ffxZeroOneAndHalf(FfxUInt16x3 x, FfxUInt16x3 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxUInt16x4 ffxZeroOneAndHalf(FfxUInt16x4 x, FfxUInt16x4 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic NOT operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the NOT operator.
+/// @param [in] y           The second value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPUCore
+FfxUInt16 ffxZeroOneNotHalf(FfxUInt16 x)
+{
+    return x ^ FFX_BROADCAST_UINT16(1);
+}
+
+/// Conditional free logic NOT operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the NOT operator.
+/// @param [in] y           The second value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPUCore
+FfxUInt16x2 ffxZeroOneNotHalf(FfxUInt16x2 x)
+{
+    return x ^ FFX_BROADCAST_UINT16X2(1);
+}
+
+/// Conditional free logic NOT operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the NOT operator.
+/// @param [in] y           The second value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPUCore
+FfxUInt16x3 ffxZeroOneNotHalf(FfxUInt16x3 x)
+{
+    return x ^ FFX_BROADCAST_UINT16X3(1);
+}
+
+/// Conditional free logic NOT operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the NOT operator.
+/// @param [in] y           The second value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPUCore
+FfxUInt16x4 ffxZeroOneNotHalf(FfxUInt16x4 x)
+{
+    return x ^ FFX_BROADCAST_UINT16X4(1);
+}
+
+/// Conditional free logic OR operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt16 ffxZeroOneOrHalf(FfxUInt16 x, FfxUInt16 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt16x2 ffxZeroOneOrHalf(FfxUInt16x2 x, FfxUInt16x2 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt16x3 ffxZeroOneOrHalf(FfxUInt16x3 x, FfxUInt16x3 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxUInt16x4 ffxZeroOneOrHalf(FfxUInt16x4 x, FfxUInt16x4 y)
+{
+    return max(x, y);
+}
+
+/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint.
+///
+/// @param [in] x           The value to converted to a Uint.
+///
+/// @returns
+/// The converted Uint value.
+///
+/// @ingroup GPUCore
+FfxUInt16 ffxZeroOneFloat16ToUint16(FfxFloat16 x)
+{
+    return FFX_TO_UINT16(x * FFX_TO_FLOAT16(FFX_TO_UINT16(1)));
+}
+
+/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint.
+///
+/// @param [in] x           The value to converted to a Uint.
+///
+/// @returns
+/// The converted Uint value.
+///
+/// @ingroup GPUCore
+FfxUInt16x2 ffxZeroOneFloat16x2ToUint16x2(FfxFloat16x2 x)
+{
+    return FFX_TO_UINT16X2(x * FFX_TO_FLOAT16X2(FfxUInt16x2(1, 1)));
+}
+
+/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint.
+///
+/// @param [in] x           The value to converted to a Uint.
+///
+/// @returns
+/// The converted Uint value.
+///
+/// @ingroup GPUCore
+FfxUInt16x3 ffxZeroOneFloat16x3ToUint16x3(FfxFloat16x3 x)
+{
+    return FFX_TO_UINT16X3(x * FFX_TO_FLOAT16X3(FfxUInt16x3(1, 1, 1)));
+}
+
+/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint.
+///
+/// @param [in] x           The value to converted to a Uint.
+///
+/// @returns
+/// The converted Uint value.
+///
+/// @ingroup GPUCore
+FfxUInt16x4 ffxZeroOneFloat16x4ToUint16x4(FfxFloat16x4 x)
+{
+    return FFX_TO_UINT16X4(x * FFX_TO_FLOAT16X4(FfxUInt16x4(1, 1, 1, 1)));
+}
+
+/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32.
+///
+/// @param [in] x           The value to converted to a half-precision FfxFloat32.
+///
+/// @returns
+/// The converted half-precision FfxFloat32 value.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxZeroOneUint16ToFloat16(FfxUInt16 x)
+{
+    return FFX_TO_FLOAT16(x * FFX_TO_UINT16(FFX_TO_FLOAT16(1.0)));
+}
+
+/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32.
+///
+/// @param [in] x           The value to converted to a half-precision FfxFloat32.
+///
+/// @returns
+/// The converted half-precision FfxFloat32 value.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxZeroOneUint16x2ToFloat16x2(FfxUInt16x2 x)
+{
+    return FFX_TO_FLOAT16X2(x * FFX_TO_UINT16X2(FfxUInt16x2(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0))));
+}
+
+/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32.
+///
+/// @param [in] x           The value to converted to a half-precision FfxFloat32.
+///
+/// @returns
+/// The converted half-precision FfxFloat32 value.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxZeroOneUint16x3ToFloat16x3(FfxUInt16x3 x)
+{
+    return FFX_TO_FLOAT16X3(x * FFX_TO_UINT16X3(FfxUInt16x3(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0))));
+}
+
+/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32.
+///
+/// @param [in] x           The value to converted to a half-precision FfxFloat32.
+///
+/// @returns
+/// The converted half-precision FfxFloat32 value.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxZeroOneUint16x4ToFloat16x4(FfxUInt16x4 x)
+{
+    return FFX_TO_FLOAT16X4(x * FFX_TO_UINT16X4(FfxUInt16x4(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0))));
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxZeroOneAndHalf(FfxFloat16 x, FfxFloat16 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxZeroOneAndHalf(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxZeroOneAndHalf(FfxFloat16x3 x, FfxFloat16x3 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxZeroOneAndHalf(FfxFloat16x4 x, FfxFloat16x4 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic AND NOT operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND NOT operator.
+/// @param [in] y           The second value to be fed into the AND NOT operator.
+///
+/// @returns
+/// Result of the AND NOT operation.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxSignedZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y)
+{
+    return (-x) * y + FFX_BROADCAST_FLOAT16(1.0);
+}
+
+/// Conditional free logic AND NOT operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND NOT operator.
+/// @param [in] y           The second value to be fed into the AND NOT operator.
+///
+/// @returns
+/// Result of the AND NOT operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxSignedZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+    return (-x) * y + FFX_BROADCAST_FLOAT16X2(1.0);
+}
+
+/// Conditional free logic AND NOT operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND NOT operator.
+/// @param [in] y           The second value to be fed into the AND NOT operator.
+///
+/// @returns
+/// Result of the AND NOT operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxSignedZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y)
+{
+    return (-x) * y + FFX_BROADCAST_FLOAT16X3(1.0);
+}
+
+/// Conditional free logic AND NOT operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND NOT operator.
+/// @param [in] y           The second value to be fed into the AND NOT operator.
+///
+/// @returns
+/// Result of the AND NOT operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxSignedZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y)
+{
+    return (-x) * y + FFX_BROADCAST_FLOAT16X4(1.0);
+}
+
+/// Conditional free logic AND operation using two half-precision values followed by
+/// a NOT operation using the resulting value and a third half-precision value.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+/// @param [in] z           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
+{
+    return FfxFloat16(ffxSaturate(x * y + z));
+}
+
+/// Conditional free logic AND operation using two half-precision values followed by
+/// a NOT operation using the resulting value and a third half-precision value.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+/// @param [in] z           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)
+{
+    return FfxFloat16x2(ffxSaturate(x * y + z));
+}
+
+/// Conditional free logic AND operation using two half-precision values followed by
+/// a NOT operation using the resulting value and a third half-precision value.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+/// @param [in] z           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)
+{
+    return FfxFloat16x3(ffxSaturate(x * y + z));
+}
+
+/// Conditional free logic AND operation using two half-precision values followed by
+/// a NOT operation using the resulting value and a third half-precision value.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+/// @param [in] z           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
+{
+    return FfxFloat16x4(ffxSaturate(x * y + z));
+}
+
+/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16 x)
+{
+    return FfxFloat16(ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF)));
+}
+
+/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x2 x)
+{
+    return FfxFloat16x2(ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF)));
+}
+
+/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x3 x)
+{
+    return FfxFloat16x3(ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF)));
+}
+
+/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x4 x)
+{
+    return FfxFloat16x4(ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF)));
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxZeroOneNotHalf(FfxFloat16 x)
+{
+    return FFX_BROADCAST_FLOAT16(1.0) - x;
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxZeroOneNotHalf(FfxFloat16x2 x)
+{
+    return FFX_BROADCAST_FLOAT16X2(1.0) - x;
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxZeroOneNotHalf(FfxFloat16x3 x)
+{
+    return FFX_BROADCAST_FLOAT16X3(1.0) - x;
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxZeroOneNotHalf(FfxFloat16x4 x)
+{
+    return FFX_BROADCAST_FLOAT16X4(1.0) - x;
+}
+
+/// Conditional free logic OR operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxZeroOneOrHalf(FfxFloat16 x, FfxFloat16 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxZeroOneOrHalf(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxZeroOneOrHalf(FfxFloat16x3 x, FfxFloat16x3 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxZeroOneOrHalf(FfxFloat16x4 x, FfxFloat16x4 y)
+{
+    return max(x, y);
+}
+
+/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x           The value to compare against zero.
+/// @param [in] y           The value to return if the comparision is greater than zero.
+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxZeroOneSelectHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
+{
+    FfxFloat16 r = (-x) * z + z;
+    return x * y + r;
+}
+
+/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x           The value to compare against zero.
+/// @param [in] y           The value to return if the comparision is greater than zero.
+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxZeroOneSelectHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)
+{
+    FfxFloat16x2 r = (-x) * z + z;
+    return x * y + r;
+}
+
+/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x           The value to compare against zero.
+/// @param [in] y           The value to return if the comparision is greater than zero.
+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxZeroOneSelectHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)
+{
+    FfxFloat16x3 r = (-x) * z + z;
+    return x * y + r;
+}
+
+/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x           The value to compare against zero.
+/// @param [in] y           The value to return if the comparision is greater than zero.
+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxZeroOneSelectHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
+{
+    FfxFloat16x4 r = (-x) * z + z;
+    return x * y + r;
+}
+
+/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxZeroOneIsSignedHalf(FfxFloat16 x)
+{
+    return FfxFloat16(ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF)));
+}
+
+/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxZeroOneIsSignedHalf(FfxFloat16x2 x)
+{
+    return FfxFloat16x2(ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF)));
+}
+
+/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxZeroOneIsSignedHalf(FfxFloat16x3 x)
+{
+    return FfxFloat16x3(ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF)));
+}
+
+/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPUCore
+FfxFloat16x4 ffxZeroOneIsSignedHalf(FfxFloat16x4 x)
+{
+    return FfxFloat16x4(ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF)));
+}
+
+/// Compute a Rec.709 color space.
+/// 
+/// Rec.709 is used for some HDTVs.
+/// 
+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
+///  (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
+///  (b.) For 8-bit  709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
+///
+/// @param [in] c           The color to convert to Rec. 709.
+/// 
+/// @returns
+/// The <c><i>color</i></c> in Rec.709 space.
+/// 
+/// @ingroup GPUCore
+FfxFloat16 ffxRec709FromLinearHalf(FfxFloat16 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45);
+    FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099);
+    return clamp(j.x, c * j.y, pow(c, j.z) * k.x + k.y);
+}
+
+/// Compute a Rec.709 color space.
+/// 
+/// Rec.709 is used for some HDTVs.
+/// 
+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
+///  (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
+///  (b.) For 8-bit  709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
+///
+/// @param [in] c           The color to convert to Rec. 709.
+/// 
+/// @returns
+/// The <c><i>color</i></c> in Rec.709 space.
+/// 
+/// @ingroup GPUCore
+FfxFloat16x2 ffxRec709FromLinearHalf(FfxFloat16x2 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45);
+    FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099);
+    return clamp(j.xx, c * j.yy, pow(c, j.zz) * k.xx + k.yy);
+}
+
+/// Compute a Rec.709 color space.
+/// 
+/// Rec.709 is used for some HDTVs.
+/// 
+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
+///  (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
+///  (b.) For 8-bit  709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
+///
+/// @param [in] c           The color to convert to Rec. 709.
+/// 
+/// @returns
+/// The <c><i>color</i></c> in Rec.709 space.
+/// 
+/// @ingroup GPUCore
+FfxFloat16x3 ffxRec709FromLinearHalf(FfxFloat16x3 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45);
+    FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099);
+    return clamp(j.xxx, c * j.yyy, pow(c, j.zzz) * k.xxx + k.yyy);
+}
+
+/// Compute a gamma value from a linear value.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+/// 
+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in <c><i>ffxLinearFromGammaHalf</i></c>.
+/// 
+/// @param [in] c              The value to convert to gamma space from linear.
+/// @param [in] rcpX           The reciprocal of power value used for the gamma curve.
+///
+/// @returns
+/// A value in gamma space.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxGammaFromLinearHalf(FfxFloat16 c, FfxFloat16 rcpX)
+{
+    return pow(c, FFX_BROADCAST_FLOAT16(rcpX));
+}
+
+/// Compute a gamma value from a linear value.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+/// 
+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in <c><i>ffxLinearFromGammaHalf</i></c>.
+/// 
+/// @param [in] c              The value to convert to gamma space from linear.
+/// @param [in] rcpX           The reciprocal of power value used for the gamma curve.
+///
+/// @returns
+/// A value in gamma space.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxGammaFromLinearHalf(FfxFloat16x2 c, FfxFloat16 rcpX)
+{
+    return pow(c, FFX_BROADCAST_FLOAT16X2(rcpX));
+}
+
+/// Compute a gamma value from a linear value.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+/// 
+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in <c><i>ffxLinearFromGammaHalf</i></c>.
+/// 
+/// @param [in] c              The value to convert to gamma space from linear.
+/// @param [in] rcpX           The reciprocal of power value used for the gamma curve.
+///
+/// @returns
+/// A value in gamma space.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxGammaFromLinearHalf(FfxFloat16x3 c, FfxFloat16 rcpX)
+{
+    return pow(c, FFX_BROADCAST_FLOAT16X3(rcpX));
+}
+
+/// Compute an SRGB value from a linear value.
+///
+/// @param [in] c           The value to convert to SRGB from linear.
+///
+/// @returns
+/// A value in SRGB space.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxSrgbFromLinearHalf(FfxFloat16 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+    FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055);
+    return clamp(j.x, c * j.y, pow(c, j.z) * k.x + k.y);
+}
+
+/// Compute an SRGB value from a linear value.
+///
+/// @param [in] c           The value to convert to SRGB from linear.
+///
+/// @returns
+/// A value in SRGB space.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxSrgbFromLinearHalf(FfxFloat16x2 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+    FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055);
+    return clamp(j.xx, c * j.yy, pow(c, j.zz) * k.xx + k.yy);
+}
+
+/// Compute an SRGB value from a linear value.
+///
+/// @param [in] c           The value to convert to SRGB from linear.
+///
+/// @returns
+/// A value in SRGB space.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxSrgbFromLinearHalf(FfxFloat16x3 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+    FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055);
+    return clamp(j.xxx, c * j.yyy, pow(c, j.zzz) * k.xxx + k.yyy);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] c           The value to compute the square root for.
+///
+/// @returns
+/// A square root of the input value.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxSquareRootHalf(FfxFloat16 c)
+{
+    return sqrt(c);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] c           The value to compute the square root for.
+///
+/// @returns
+/// A square root of the input value.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxSquareRootHalf(FfxFloat16x2 c)
+{
+    return sqrt(c);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] c           The value to compute the square root for.
+///
+/// @returns
+/// A square root of the input value.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxSquareRootHalf(FfxFloat16x3 c)
+{
+    return sqrt(c);
+}
+
+/// Compute the cube root of a value.
+///
+/// @param [in] c           The value to compute the cube root for.
+///
+/// @returns
+/// A cube root of the input value.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxCubeRootHalf(FfxFloat16 c)
+{
+    return pow(c, FFX_BROADCAST_FLOAT16(1.0 / 3.0));
+}
+
+/// Compute the cube root of a value.
+///
+/// @param [in] c           The value to compute the cube root for.
+///
+/// @returns
+/// A cube root of the input value.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxCubeRootHalf(FfxFloat16x2 c)
+{
+    return pow(c, FFX_BROADCAST_FLOAT16X2(1.0 / 3.0));
+}
+
+/// Compute the cube root of a value.
+///
+/// @param [in] c           The value to compute the cube root for.
+///
+/// @returns
+/// A cube root of the input value.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxCubeRootHalf(FfxFloat16x3 c)
+{
+    return pow(c, FFX_BROADCAST_FLOAT16X3(1.0 / 3.0));
+}
+
+/// Compute a linear value from a REC.709 value.
+///
+/// @param [in] c           The value to convert to linear from REC.709.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxLinearFromRec709Half(FfxFloat16 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
+    FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099);
+    return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z));
+}
+
+/// Compute a linear value from a REC.709 value.
+///
+/// @param [in] c           The value to convert to linear from REC.709.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxLinearFromRec709Half(FfxFloat16x2 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
+    FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099);
+    return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz));
+}
+
+/// Compute a linear value from a REC.709 value.
+///
+/// @param [in] c           The value to convert to linear from REC.709.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxLinearFromRec709Half(FfxFloat16x3 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
+    FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099);
+    return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz));
+}
+
+/// Compute a linear value from a value in a gamma space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] c           The value to convert to linear in gamma space.
+/// @param [in] x           The power value used for the gamma curve.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxLinearFromGammaHalf(FfxFloat16 c, FfxFloat16 x)
+{
+    return pow(c, FFX_BROADCAST_FLOAT16(x));
+}
+
+/// Compute a linear value from a value in a gamma space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] c           The value to convert to linear in gamma space.
+/// @param [in] x           The power value used for the gamma curve.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxLinearFromGammaHalf(FfxFloat16x2 c, FfxFloat16 x)
+{
+    return pow(c, FFX_BROADCAST_FLOAT16X2(x));
+}
+
+/// Compute a linear value from a value in a gamma space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] c           The value to convert to linear in gamma space.
+/// @param [in] x           The power value used for the gamma curve.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxLinearFromGammaHalf(FfxFloat16x3 c, FfxFloat16 x)
+{
+    return pow(c, FFX_BROADCAST_FLOAT16X3(x));
+}
+
+/// Compute a linear value from a value in a SRGB space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] c           The value to convert to linear in SRGB space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+    FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
+    return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z));
+}
+
+/// Compute a linear value from a value in a SRGB space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] c           The value to convert to linear in SRGB space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+    FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
+    return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz));
+}
+
+/// Compute a linear value from a value in a SRGB space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] c           The value to convert to linear in SRGB space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPUCore
+FfxFloat16x3 ffxLinearFromSrgbHalf(FfxFloat16x3 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+    FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
+    return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz));
+}
+
+/// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear.
+/// 
+///  543210
+///  ======
+///  ..xxx.
+///  yy...y
+/// 
+/// @param [in] a       The input 1D coordinates to remap.
+///
+/// @returns
+/// The remapped 2D coordinates.
+///
+/// @ingroup GPUCore
+FfxUInt16x2 ffxRemapForQuadHalf(FfxUInt32 a)
+{
+    return FfxUInt16x2(bitfieldExtract(a, 1u, 3u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), a, 1u));
+}
+
+/// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions.
+///
+/// The 64-wide lane indices to 8x8 remapping is performed as follows:
+/// 
+///     00 01 08 09 10 11 18 19
+///     02 03 0a 0b 12 13 1a 1b
+///     04 05 0c 0d 14 15 1c 1d
+///     06 07 0e 0f 16 17 1e 1f
+///     20 21 28 29 30 31 38 39
+///     22 23 2a 2b 32 33 3a 3b
+///     24 25 2c 2d 34 35 3c 3d
+///     26 27 2e 2f 36 37 3e 3f
+///
+/// @param [in] a       The input 1D coordinate to remap.
+/// 
+/// @returns
+/// The remapped 2D coordinates.
+/// 
+/// @ingroup GPUCore
+FfxUInt16x2 ffxRemapForWaveReductionHalf(FfxUInt32 a)
+{
+    return FfxUInt16x2(bitfieldInsertMask(bitfieldExtract(a, 2u, 3u), a, 1u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), bitfieldExtract(a, 1u, 2u), 2u));
+}
+
+#endif  // FFX_HALF
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common_half.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common_half.h.meta
new file mode 100644
index 00000000..e78eec42
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_gpu_common_half.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: 1bdb323791a91a5438ee8e1e63187840
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_hlsl.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_hlsl.h
new file mode 100644
index 00000000..337eb06f
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_hlsl.h
@@ -0,0 +1,1651 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+/// @defgroup HLSLCore HLSL Core
+/// HLSL core defines and functions
+///
+/// @ingroup FfxHLSL
+
+#define DECLARE_SRV_REGISTER(regIndex) t##regIndex
+#define DECLARE_UAV_REGISTER(regIndex) u##regIndex
+#define DECLARE_CB_REGISTER(regIndex)  b##regIndex
+#define FFX_DECLARE_SRV(regIndex)   register(DECLARE_SRV_REGISTER(regIndex))
+#define FFX_DECLARE_UAV(regIndex)   register(DECLARE_UAV_REGISTER(regIndex))
+#define FFX_DECLARE_CB(regIndex)    register(DECLARE_CB_REGISTER(regIndex))
+
+/// A define for abstracting shared memory between shading languages.
+///
+/// @ingroup HLSLCore
+#define FFX_GROUPSHARED groupshared
+
+/// A define for abstracting compute memory barriers between shading languages.
+///
+/// @ingroup HLSLCore
+#define FFX_GROUP_MEMORY_BARRIER GroupMemoryBarrierWithGroupSync
+
+/// A define for abstracting compute atomic additions between shading languages.
+///
+/// @ingroup HLSLCore
+#define FFX_ATOMIC_ADD(x, y) InterlockedAdd(x, y)
+
+/// A define added to accept static markup on functions to aid CPU/GPU portability of code.
+///
+/// @ingroup HLSLCore
+#define FFX_STATIC static
+
+/// A define for abstracting loop unrolling between shading languages.
+///
+/// @ingroup HLSLCore
+#define FFX_UNROLL [unroll]
+
+/// A define for abstracting a 'greater than' comparison operator between two types.
+///
+/// @ingroup HLSLCore
+#define FFX_GREATER_THAN(x, y) x > y
+
+/// A define for abstracting a 'greater than or equal' comparison operator between two types.
+///
+/// @ingroup HLSLCore
+#define FFX_GREATER_THAN_EQUAL(x, y) x >= y
+
+/// A define for abstracting a 'less than' comparison operator between two types.
+///
+/// @ingroup HLSLCore
+#define FFX_LESS_THAN(x, y) x < y
+
+/// A define for abstracting a 'less than or equal' comparison operator between two types.
+///
+/// @ingroup HLSLCore
+#define FFX_LESS_THAN_EQUAL(x, y) x <= y
+
+/// A define for abstracting an 'equal' comparison operator between two types.
+///
+/// @ingroup HLSLCore
+#define FFX_EQUAL(x, y) x == y
+
+/// A define for abstracting a 'not equal' comparison operator between two types.
+///
+/// @ingroup HLSLCore
+#define FFX_NOT_EQUAL(x, y) x != y
+
+/// A define for abstracting matrix multiply operations between shading languages.
+///
+/// @ingroup HLSLCore
+#define FFX_MATRIX_MULTIPLY(a, b) mul(a, b)
+
+/// A define for abstracting vector transformations between shading languages.
+///
+/// @ingroup HLSLCore
+#define FFX_TRANSFORM_VECTOR(a, b) mul(a, b)
+
+/// A define for abstracting modulo operations between shading languages.
+///
+/// @ingroup HLSLCore
+#define FFX_MODULO(a, b) (fmod(a, b))
+
+/// Broadcast a scalar value to a 1-dimensional floating point vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_FLOAT32(x) FfxFloat32(x)
+
+/// Broadcast a scalar value to a 2-dimensional floating point vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_FLOAT32X2(x) FfxFloat32(x)
+
+/// Broadcast a scalar value to a 3-dimensional floating point vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_FLOAT32X3(x) FfxFloat32(x)
+
+/// Broadcast a scalar value to a 4-dimensional floating point vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_FLOAT32X4(x) FfxFloat32(x)
+
+/// Broadcast a scalar value to a 1-dimensional unsigned integer vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_UINT32(x) FfxUInt32(x)
+
+/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_UINT32X2(x) FfxUInt32(x)
+
+/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_UINT32X3(x) FfxUInt32(x)
+
+/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_UINT32X4(x) FfxUInt32(x)
+
+/// Broadcast a scalar value to a 1-dimensional signed integer vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_INT32(x) FfxInt32(x)
+
+/// Broadcast a scalar value to a 2-dimensional signed integer vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_INT32X2(x) FfxInt32(x)
+
+/// Broadcast a scalar value to a 3-dimensional signed integer vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_INT32X3(x) FfxInt32(x)
+
+/// Broadcast a scalar value to a 4-dimensional signed integer vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_INT32X4(x) FfxInt32(x)
+
+/// Broadcast a scalar value to a 1-dimensional half-precision floating point vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_MIN_FLOAT16(a)   FFX_MIN16_F(a)
+
+/// Broadcast a scalar value to a 2-dimensional half-precision floating point vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_MIN_FLOAT16X2(a) FFX_MIN16_F(a)
+
+/// Broadcast a scalar value to a 3-dimensional half-precision floating point vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_MIN_FLOAT16X3(a) FFX_MIN16_F(a)
+
+/// Broadcast a scalar value to a 4-dimensional half-precision floating point vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_MIN_FLOAT16X4(a) FFX_MIN16_F(a)
+
+/// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_MIN_UINT16(a)   FFX_MIN16_U(a)
+
+/// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_MIN_UINT16X2(a) FFX_MIN16_U(a)
+
+/// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_MIN_UINT16X3(a) FFX_MIN16_U(a)
+
+/// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_MIN_UINT16X4(a) FFX_MIN16_U(a)
+
+/// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_MIN_INT16(a)   FFX_MIN16_I(a)
+
+/// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_MIN_INT16X2(a) FFX_MIN16_I(a)
+
+/// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_MIN_INT16X3(a) FFX_MIN16_I(a)
+
+/// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector.
+///
+/// @ingroup HLSLCore
+#define FFX_BROADCAST_MIN_INT16X4(a) FFX_MIN16_I(a)
+
+/// Pack 2x32-bit floating point values in a single 32bit value.
+///
+/// This function first converts each component of <c><i>value</i></c> into their nearest 16-bit floating
+/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the
+/// 32bit unsigned integer respectively.
+///
+/// @param [in] value               A 2-dimensional floating point value to convert and pack.
+///
+/// @returns
+/// A packed 32bit value containing 2 16bit floating point values.
+///
+/// @ingroup HLSLCore
+FfxUInt32 packHalf2x16(FfxFloat32x2 value)
+{
+    return f32tof16(value.x) | (f32tof16(value.y) << 16);
+}
+
+/// Broadcast a scalar value to a 2-dimensional floating point vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 2-dimensional floating point vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup HLSLCore
+FfxFloat32x2 ffxBroadcast2(FfxFloat32 value)
+{
+    return FfxFloat32x2(value, value);
+}
+
+/// Broadcast a scalar value to a 3-dimensional floating point vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 3-dimensional floating point vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup HLSLCore
+FfxFloat32x3 ffxBroadcast3(FfxFloat32 value)
+{
+    return FfxFloat32x3(value, value, value);
+}
+
+/// Broadcast a scalar value to a 4-dimensional floating point vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 4-dimensional floating point vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup HLSLCore
+FfxFloat32x4 ffxBroadcast4(FfxFloat32 value)
+{
+    return FfxFloat32x4(value, value, value, value);
+}
+
+/// Broadcast a scalar value to a 2-dimensional signed integer vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 2-dimensional signed integer vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup HLSLCore
+FfxInt32x2 ffxBroadcast2(FfxInt32 value)
+{
+    return FfxInt32x2(value, value);
+}
+
+/// Broadcast a scalar value to a 3-dimensional signed integer vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 3-dimensional signed integer vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup HLSLCore
+FfxUInt32x3 ffxBroadcast3(FfxInt32 value)
+{
+    return FfxUInt32x3(value, value, value);
+}
+
+/// Broadcast a scalar value to a 4-dimensional signed integer vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 4-dimensional signed integer vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup HLSLCore
+FfxInt32x4 ffxBroadcast4(FfxInt32 value)
+{
+    return FfxInt32x4(value, value, value, value);
+}
+
+/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 2-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup HLSLCore
+FfxUInt32x2 ffxBroadcast2(FfxUInt32 value)
+{
+    return FfxUInt32x2(value, value);
+}
+
+/// Broadcast a scalar value to a 3-dimensional unsigned integer vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 3-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup HLSLCore
+FfxUInt32x3 ffxBroadcast3(FfxUInt32 value)
+{
+    return FfxUInt32x3(value, value, value);
+}
+
+/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 4-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup HLSLCore
+FfxUInt32x4 ffxBroadcast4(FfxUInt32 value)
+{
+    return FfxUInt32x4(value, value, value, value);
+}
+
+FfxUInt32 bitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits)
+{
+    FfxUInt32 mask = (1u << bits) - 1;
+    return (src >> off) & mask;
+}
+
+FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask)
+{
+    return (ins & mask) | (src & (~mask));
+}
+
+FfxUInt32 bitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits)
+{
+    FfxUInt32 mask = (1u << bits) - 1;
+    return (ins & mask) | (src & (~mask));
+}
+
+/// Interprets the bit pattern of x as an unsigned integer.
+///
+/// @param [in] x               The input value.
+///
+/// @returns
+/// The input interpreted as an unsigned integer.
+///
+/// @ingroup HLSLCore
+FfxUInt32 ffxAsUInt32(FfxFloat32 x)
+{
+    return asuint(x);
+}
+
+/// Interprets the bit pattern of x as an unsigned integer.
+///
+/// @param [in] x               The input value.
+///
+/// @returns
+/// The input interpreted as an unsigned integer.
+///
+/// @ingroup HLSLCore
+FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x)
+{
+    return asuint(x);
+}
+
+/// Interprets the bit pattern of x as an unsigned integer.
+///
+/// @param [in] x               The input value.
+///
+/// @returns
+/// The input interpreted as an unsigned integer.
+///
+/// @ingroup HLSLCore
+FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x)
+{
+    return asuint(x);
+}
+
+/// Interprets the bit pattern of x as an unsigned integer.
+///
+/// @param [in] x               The input value.
+///
+/// @returns
+/// The input interpreted as an unsigned integer.
+///
+/// @ingroup HLSLCore
+FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x)
+{
+    return asuint(x);
+}
+
+/// Interprets the bit pattern of x as a floating-point number.
+///
+/// @param [in] x               The input value.
+///
+/// @returns
+/// The input interpreted as a floating-point number.
+///
+/// @ingroup HLSLCore
+FfxFloat32 ffxAsFloat(FfxUInt32 x)
+{
+    return asfloat(x);
+}
+
+/// Interprets the bit pattern of x as a floating-point number.
+///
+/// @param [in] x               The input value.
+///
+/// @returns
+/// The input interpreted as a floating-point number.
+///
+/// @ingroup HLSLCore
+FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x)
+{
+    return asfloat(x);
+}
+
+/// Interprets the bit pattern of x as a floating-point number.
+///
+/// @param [in] x               The input value.
+///
+/// @returns
+/// The input interpreted as a floating-point number.
+///
+/// @ingroup HLSLCore
+FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x)
+{
+    return asfloat(x);
+}
+
+/// Interprets the bit pattern of x as a floating-point number.
+///
+/// @param [in] x               The input value.
+///
+/// @returns
+/// The input interpreted as a floating-point number.
+///
+/// @ingroup HLSLCore
+FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x)
+{
+    return asfloat(x);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
+{
+    return lerp(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t)
+{
+    return lerp(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t)
+{
+    return lerp(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t)
+{
+    return lerp(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t)
+{
+    return lerp(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t)
+{
+    return lerp(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t)
+{
+    return lerp(x, y, t);
+}
+
+/// Clamp a value to a [0..1] range.
+///
+/// @param [in] x               The value to clamp to [0..1] range.
+///
+/// @returns
+/// The clamped version of <c><i>x</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32 ffxSaturate(FfxFloat32 x)
+{
+    return saturate(x);
+}
+
+/// Clamp a value to a [0..1] range.
+///
+/// @param [in] x               The value to clamp to [0..1] range.
+///
+/// @returns
+/// The clamped version of <c><i>x</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32x2 ffxSaturate(FfxFloat32x2 x)
+{
+    return saturate(x);
+}
+
+/// Clamp a value to a [0..1] range.
+///
+/// @param [in] x               The value to clamp to [0..1] range.
+///
+/// @returns
+/// The clamped version of <c><i>x</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32x3 ffxSaturate(FfxFloat32x3 x)
+{
+    return saturate(x);
+}
+
+/// Clamp a value to a [0..1] range.
+///
+/// @param [in] x               The value to clamp to [0..1] range.
+///
+/// @returns
+/// The clamped version of <c><i>x</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32x4 ffxSaturate(FfxFloat32x4 x)
+{
+    return saturate(x);
+}
+
+/// Compute the factional part of a decimal value.
+///
+/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
+/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
+/// function.
+///
+/// @param [in] x               The value to compute the fractional part from.
+///
+/// @returns
+/// The fractional part of <c><i>x</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32 ffxFract(FfxFloat32 x)
+{
+    return x - floor(x);
+}
+
+/// Compute the factional part of a decimal value.
+///
+/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
+/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
+/// function.
+///
+/// @param [in] x               The value to compute the fractional part from.
+///
+/// @returns
+/// The fractional part of <c><i>x</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32x2 ffxFract(FfxFloat32x2 x)
+{
+    return x - floor(x);
+}
+
+/// Compute the factional part of a decimal value.
+///
+/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
+/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
+/// function.
+///
+/// @param [in] x               The value to compute the fractional part from.
+///
+/// @returns
+/// The fractional part of <c><i>x</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32x3 ffxFract(FfxFloat32x3 x)
+{
+    return x - floor(x);
+}
+
+/// Compute the factional part of a decimal value.
+///
+/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
+/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
+/// function.
+///
+/// @param [in] x               The value to compute the fractional part from.
+///
+/// @returns
+/// The fractional part of <c><i>x</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32x4 ffxFract(FfxFloat32x4 x)
+{
+    return x - floor(x);
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSLCore
+FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSLCore
+FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSLCore
+FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSLCore
+FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+    // return min(max(min(y, z), x), max(y, z));
+    // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+    // return min(max(min(y, z), x), max(y, z));
+    // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
+{
+    return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
+{
+    return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
+{
+    return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSLCore
+FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
+{
+    return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSLCore
+FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
+{
+    return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSLCore
+FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
+{
+    return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calculation.
+/// @param [in] z               The third value to include in the min calculation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSLCore
+FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
+{
+    return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSLCore
+FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
+{
+    return min(x, min(y, z));
+}
+
+
+FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)
+{
+    return FfxUInt32(FfxInt32(a) >> FfxInt32(b));
+}
+
+FfxUInt32 ffxPackF32(FfxFloat32x2 v){
+    FfxUInt32x2 p = FfxUInt32x2(f32tof16(FfxFloat32x2(v).x), f32tof16(FfxFloat32x2(v).y));
+	return p.x | (p.y << 16);
+}
+
+FfxFloat32x2 ffxUnpackF32(FfxUInt32 a){
+    return f16tof32(FfxUInt32x2(a & 0xFFFF, a >> 16));
+}
+
+//==============================================================================================================================
+//                                                          HLSL HALF
+//==============================================================================================================================
+//==============================================================================================================================
+// Need to use manual unpack to get optimal execution (don't use packed types in buffers directly).
+// Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/
+FFX_MIN16_F2 ffxUint32ToFloat16x2(FfxUInt32 x)
+{
+	FfxFloat32x2 t = f16tof32(FfxUInt32x2(x & 0xFFFF, x >> 16));
+	return FFX_MIN16_F2(t);
+}
+FFX_MIN16_F4 ffxUint32x2ToFloat16x4(FfxUInt32x2 x)
+{
+	return FFX_MIN16_F4(ffxUint32ToFloat16x2(x.x), ffxUint32ToFloat16x2(x.y));
+}
+FFX_MIN16_U2 ffxUint32ToUint16x2(FfxUInt32 x)
+{
+	FfxUInt32x2 t = FfxUInt32x2(x & 0xFFFF, x >> 16);
+	return FFX_MIN16_U2(t);
+}
+FFX_MIN16_U4 ffxUint32x2ToUint16x4(FfxUInt32x2 x)
+{
+	return FFX_MIN16_U4(ffxUint32ToUint16x2(x.x), ffxUint32ToUint16x2(x.y));
+}
+
+/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned.
+/// @param v Value to invert.
+/// @return If v = 0 returns 0. If v != 0 returns 1/v.
+FfxFloat32 ffxInvertSafe(FfxFloat32 v){
+    FfxFloat32 s = sign(v);
+    FfxFloat32 s2 = s*s;
+    return s2/(v + s2 - 1.0);
+}
+
+/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned.
+/// @param v Value to invert.
+/// @return If v = 0 returns 0. If v != 0 returns 1/v.
+FfxFloat32x2 ffxInvertSafe(FfxFloat32x2 v){
+    FfxFloat32x2 s = sign(v);
+    FfxFloat32x2 s2 = s*s;
+    return s2/(v + s2 - FfxFloat32x2(1.0, 1.0));
+}
+
+/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned.
+/// @param v Value to invert.
+/// @return If v = 0 returns 0. If v != 0 returns 1/v.
+FfxFloat32x3 ffxInvertSafe(FfxFloat32x3 v){
+    FfxFloat32x3 s = sign(v);
+    FfxFloat32x3 s2 = s*s;
+    return s2/(v + s2 - FfxFloat32x3(1.0, 1.0, 1.0));
+}
+
+/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned.
+/// @param v Value to invert.
+/// @return If v = 0 returns 0. If v != 0 returns 1/v.
+FfxFloat32x4 ffxInvertSafe(FfxFloat32x4 v){
+    FfxFloat32x4 s = sign(v);
+    FfxFloat32x4 s2 = s*s;
+    return s2/(v + s2 - FfxFloat32x4(1.0, 1.0, 1.0, 1.0));
+}
+
+#define FFX_UINT32_TO_FLOAT16X2(x) ffxUint32ToFloat16x2(FfxUInt32(x))
+#if FFX_HALF
+
+#define FFX_UINT32X2_TO_FLOAT16X4(x) ffxUint32x2ToFloat16x4(FfxUInt32x2(x))
+#define FFX_UINT32_TO_UINT16X2(x) ffxUint32ToUint16x2(FfxUInt32(x))
+#define FFX_UINT32X2_TO_UINT16X4(x) ffxUint32x2ToUint16x4(FfxUInt32x2(x))
+
+FfxUInt32 ffxPackF16(FfxFloat16x2 v){
+    FfxUInt32x2 p = FfxUInt32x2(f32tof16(FfxFloat32x2(v).x), f32tof16(FfxFloat32x2(v).y));
+	return p.x | (p.y << 16);
+}
+
+FfxFloat16x2 ffxUnpackF16(FfxUInt32 a){
+    return FfxFloat16x2(f16tof32(FfxUInt32x2(a & 0xFFFF, a >> 16)));
+}
+
+//------------------------------------------------------------------------------------------------------------------------------
+FfxUInt32 FFX_MIN16_F2ToUint32(FFX_MIN16_F2 x)
+{
+	return f32tof16(x.x) + (f32tof16(x.y) << 16);
+}
+FfxUInt32x2 FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4 x)
+{
+	return FfxUInt32x2(FFX_MIN16_F2ToUint32(x.xy), FFX_MIN16_F2ToUint32(x.zw));
+}
+FfxUInt32 FFX_MIN16_U2ToUint32(FFX_MIN16_U2 x)
+{
+	return FfxUInt32(x.x) + (FfxUInt32(x.y) << 16);
+}
+FfxUInt32x2 FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4 x)
+{
+	return FfxUInt32x2(FFX_MIN16_U2ToUint32(x.xy), FFX_MIN16_U2ToUint32(x.zw));
+}
+#define FFX_FLOAT16X2_TO_UINT32(x) FFX_MIN16_F2ToUint32(FFX_MIN16_F2(x))
+#define FFX_FLOAT16X4_TO_UINT32X2(x) FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4(x))
+#define FFX_UINT16X2_TO_UINT32(x) FFX_MIN16_U2ToUint32(FFX_MIN16_U2(x))
+#define FFX_UINT16X4_TO_UINT32X2(x) FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4(x))
+
+#if (FFX_HLSL_SM >= 62) && !defined(FFX_NO_16_BIT_CAST)
+#define FFX_TO_UINT16(x) asuint16(x)
+#define FFX_TO_UINT16X2(x) asuint16(x)
+#define FFX_TO_UINT16X3(x) asuint16(x)
+#define FFX_TO_UINT16X4(x) asuint16(x)
+#else
+#define FFX_TO_UINT16(a) FFX_MIN16_U(f32tof16(FfxFloat32(a)))
+#define FFX_TO_UINT16X2(a) FFX_MIN16_U2(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y))
+#define FFX_TO_UINT16X3(a) FFX_MIN16_U3(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z))
+#define FFX_TO_UINT16X4(a) FFX_MIN16_U4(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z), FFX_TO_UINT16((a).w))
+#endif // #if (FFX_HLSL_SM>=62) && !defined(FFX_NO_16_BIT_CAST)
+
+#if (FFX_HLSL_SM >= 62) && !defined(FFX_NO_16_BIT_CAST)
+#define FFX_TO_FLOAT16(x) asfloat16(x)
+#define FFX_TO_FLOAT16X2(x) asfloat16(x)
+#define FFX_TO_FLOAT16X3(x) asfloat16(x)
+#define FFX_TO_FLOAT16X4(x) asfloat16(x)
+#else
+#define FFX_TO_FLOAT16(a) FFX_MIN16_F(f16tof32(FfxUInt32(a)))
+#define FFX_TO_FLOAT16X2(a) FFX_MIN16_F2(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y))
+#define FFX_TO_FLOAT16X3(a) FFX_MIN16_F3(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z))
+#define FFX_TO_FLOAT16X4(a) FFX_MIN16_F4(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z), FFX_TO_FLOAT16((a).w))
+#endif // #if (FFX_HLSL_SM>=62) && !defined(FFX_NO_16_BIT_CAST)
+
+//==============================================================================================================================
+#define FFX_BROADCAST_FLOAT16(a)   FFX_MIN16_F(a)
+#define FFX_BROADCAST_FLOAT16X2(a) FFX_MIN16_F(a)
+#define FFX_BROADCAST_FLOAT16X3(a) FFX_MIN16_F(a)
+#define FFX_BROADCAST_FLOAT16X4(a) FFX_MIN16_F(a)
+
+//------------------------------------------------------------------------------------------------------------------------------
+#define FFX_BROADCAST_INT16(a)   FFX_MIN16_I(a)
+#define FFX_BROADCAST_INT16X2(a) FFX_MIN16_I(a)
+#define FFX_BROADCAST_INT16X3(a) FFX_MIN16_I(a)
+#define FFX_BROADCAST_INT16X4(a) FFX_MIN16_I(a)
+
+//------------------------------------------------------------------------------------------------------------------------------
+#define FFX_BROADCAST_UINT16(a)   FFX_MIN16_U(a)
+#define FFX_BROADCAST_UINT16X2(a) FFX_MIN16_U(a)
+#define FFX_BROADCAST_UINT16X3(a) FFX_MIN16_U(a)
+#define FFX_BROADCAST_UINT16X4(a) FFX_MIN16_U(a)
+
+//==============================================================================================================================
+FFX_MIN16_U ffxAbsHalf(FFX_MIN16_U a)
+{
+	return FFX_MIN16_U(abs(FFX_MIN16_I(a)));
+}
+FFX_MIN16_U2 ffxAbsHalf(FFX_MIN16_U2 a)
+{
+	return FFX_MIN16_U2(abs(FFX_MIN16_I2(a)));
+}
+FFX_MIN16_U3 ffxAbsHalf(FFX_MIN16_U3 a)
+{
+	return FFX_MIN16_U3(abs(FFX_MIN16_I3(a)));
+}
+FFX_MIN16_U4 ffxAbsHalf(FFX_MIN16_U4 a)
+{
+	return FFX_MIN16_U4(abs(FFX_MIN16_I4(a)));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFX_MIN16_F ffxClampHalf(FFX_MIN16_F x, FFX_MIN16_F n, FFX_MIN16_F m)
+{
+	return max(n, min(x, m));
+}
+FFX_MIN16_F2 ffxClampHalf(FFX_MIN16_F2 x, FFX_MIN16_F2 n, FFX_MIN16_F2 m)
+{
+	return max(n, min(x, m));
+}
+FFX_MIN16_F3 ffxClampHalf(FFX_MIN16_F3 x, FFX_MIN16_F3 n, FFX_MIN16_F3 m)
+{
+	return max(n, min(x, m));
+}
+FFX_MIN16_F4 ffxClampHalf(FFX_MIN16_F4 x, FFX_MIN16_F4 n, FFX_MIN16_F4 m)
+{
+	return max(n, min(x, m));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+// V_FRACT_F16 (note DX frac() is different).
+FFX_MIN16_F ffxFract(FFX_MIN16_F x)
+{
+	return x - floor(x);
+}
+FFX_MIN16_F2 ffxFract(FFX_MIN16_F2 x)
+{
+	return x - floor(x);
+}
+FFX_MIN16_F3 ffxFract(FFX_MIN16_F3 x)
+{
+	return x - floor(x);
+}
+FFX_MIN16_F4 ffxFract(FFX_MIN16_F4 x)
+{
+	return x - floor(x);
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFX_MIN16_F ffxLerp(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F a)
+{
+	return lerp(x, y, a);
+}
+FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F a)
+{
+	return lerp(x, y, a);
+}
+FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 a)
+{
+	return lerp(x, y, a);
+}
+FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F a)
+{
+	return lerp(x, y, a);
+}
+FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 a)
+{
+	return lerp(x, y, a);
+}
+FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F a)
+{
+	return lerp(x, y, a);
+}
+FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 a)
+{
+	return lerp(x, y, a);
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFX_MIN16_F ffxMax3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z)
+{
+	return max(x, max(y, z));
+}
+FFX_MIN16_F2 ffxMax3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z)
+{
+	return max(x, max(y, z));
+}
+FFX_MIN16_F3 ffxMax3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z)
+{
+	return max(x, max(y, z));
+}
+FFX_MIN16_F4 ffxMax3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z)
+{
+	return max(x, max(y, z));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFX_MIN16_F ffxMin3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z)
+{
+	return min(x, min(y, z));
+}
+FFX_MIN16_F2 ffxMin3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z)
+{
+	return min(x, min(y, z));
+}
+FFX_MIN16_F3 ffxMin3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z)
+{
+	return min(x, min(y, z));
+}
+FFX_MIN16_F4 ffxMin3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z)
+{
+	return min(x, min(y, z));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFX_MIN16_F ffxMed3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+FFX_MIN16_F2 ffxMed3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+FFX_MIN16_F3 ffxMed3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+FFX_MIN16_F4 ffxMed3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFX_MIN16_I ffxMed3Half(FFX_MIN16_I x, FFX_MIN16_I y, FFX_MIN16_I z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+FFX_MIN16_I2 ffxMed3Half(FFX_MIN16_I2 x, FFX_MIN16_I2 y, FFX_MIN16_I2 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+FFX_MIN16_I3 ffxMed3Half(FFX_MIN16_I3 x, FFX_MIN16_I3 y, FFX_MIN16_I3 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+FFX_MIN16_I4 ffxMed3Half(FFX_MIN16_I4 x, FFX_MIN16_I4 y, FFX_MIN16_I4 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFX_MIN16_F ffxReciprocalHalf(FFX_MIN16_F x)
+{
+	return rcp(x);
+}
+FFX_MIN16_F2 ffxReciprocalHalf(FFX_MIN16_F2 x)
+{
+	return rcp(x);
+}
+FFX_MIN16_F3 ffxReciprocalHalf(FFX_MIN16_F3 x)
+{
+	return rcp(x);
+}
+FFX_MIN16_F4 ffxReciprocalHalf(FFX_MIN16_F4 x)
+{
+	return rcp(x);
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFX_MIN16_F ffxReciprocalSquareRootHalf(FFX_MIN16_F x)
+{
+	return rsqrt(x);
+}
+FFX_MIN16_F2 ffxReciprocalSquareRootHalf(FFX_MIN16_F2 x)
+{
+	return rsqrt(x);
+}
+FFX_MIN16_F3 ffxReciprocalSquareRootHalf(FFX_MIN16_F3 x)
+{
+	return rsqrt(x);
+}
+FFX_MIN16_F4 ffxReciprocalSquareRootHalf(FFX_MIN16_F4 x)
+{
+	return rsqrt(x);
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFX_MIN16_F ffxSaturate(FFX_MIN16_F x)
+{
+	return saturate(x);
+}
+FFX_MIN16_F2 ffxSaturate(FFX_MIN16_F2 x)
+{
+	return saturate(x);
+}
+FFX_MIN16_F3 ffxSaturate(FFX_MIN16_F3 x)
+{
+	return saturate(x);
+}
+FFX_MIN16_F4 ffxSaturate(FFX_MIN16_F4 x)
+{
+	return saturate(x);
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFX_MIN16_U ffxBitShiftRightHalf(FFX_MIN16_U a, FFX_MIN16_U b)
+{
+	return FFX_MIN16_U(FFX_MIN16_I(a) >> FFX_MIN16_I(b));
+}
+FFX_MIN16_U2 ffxBitShiftRightHalf(FFX_MIN16_U2 a, FFX_MIN16_U2 b)
+{
+	return FFX_MIN16_U2(FFX_MIN16_I2(a) >> FFX_MIN16_I2(b));
+}
+FFX_MIN16_U3 ffxBitShiftRightHalf(FFX_MIN16_U3 a, FFX_MIN16_U3 b)
+{
+	return FFX_MIN16_U3(FFX_MIN16_I3(a) >> FFX_MIN16_I3(b));
+}
+FFX_MIN16_U4 ffxBitShiftRightHalf(FFX_MIN16_U4 a, FFX_MIN16_U4 b)
+{
+	return FFX_MIN16_U4(FFX_MIN16_I4(a) >> FFX_MIN16_I4(b));
+}
+#endif // FFX_HALF
+
+//==============================================================================================================================
+//                                                         HLSL WAVE
+//==============================================================================================================================
+#if defined(FFX_WAVE)
+// Where 'x' must be a compile time literal.
+FfxFloat32 AWaveXorF1(FfxFloat32 v, FfxUInt32 x)
+{
+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxFloat32x2 AWaveXorF2(FfxFloat32x2 v, FfxUInt32 x)
+{
+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxFloat32x3 AWaveXorF3(FfxFloat32x3 v, FfxUInt32 x)
+{
+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxFloat32x4 AWaveXorF4(FfxFloat32x4 v, FfxUInt32 x)
+{
+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxUInt32 AWaveXorU1(FfxUInt32 v, FfxUInt32 x)
+{
+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxUInt32x2 AWaveXorU1(FfxUInt32x2 v, FfxUInt32 x)
+{
+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxUInt32x3 AWaveXorU1(FfxUInt32x3 v, FfxUInt32 x)
+{
+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxUInt32x4 AWaveXorU1(FfxUInt32x4 v, FfxUInt32 x)
+{
+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxBoolean AWaveIsFirstLane()
+{
+    return WaveIsFirstLane();
+}
+FfxUInt32 AWaveLaneIndex()
+{
+    return WaveGetLaneIndex();
+}
+FfxBoolean AWaveReadAtLaneIndexB1(FfxBoolean v, FfxUInt32 x)
+{
+    return WaveReadLaneAt(v, x);
+}
+FfxUInt32 AWavePrefixCountBits(FfxBoolean v)
+{
+    return WavePrefixCountBits(v);
+}
+FfxUInt32 AWaveActiveCountBits(FfxBoolean v)
+{
+    return WaveActiveCountBits(v);
+}
+FfxUInt32 AWaveReadLaneFirstU1(FfxUInt32 v)
+{
+    return WaveReadLaneFirst(v);
+}
+FfxUInt32 WaveOr(FfxUInt32 a)
+{
+    return WaveActiveBitOr(a);
+}
+FfxFloat32 WaveMin(FfxFloat32 a)
+{
+    return WaveActiveMin(a);
+}
+FfxFloat32 WaveMax(FfxFloat32 a)
+{
+    return WaveActiveMax(a);
+}
+FfxUInt32 WaveLaneCount()
+{
+    return WaveGetLaneCount();
+}
+FfxBoolean WaveAllTrue(FfxBoolean v)
+{
+    return WaveActiveAllTrue(v);
+}
+FfxFloat32 QuadReadX(FfxFloat32 v)
+{
+    return QuadReadAcrossX(v);
+}
+FfxFloat32x2 QuadReadX(FfxFloat32x2 v)
+{
+    return QuadReadAcrossX(v);
+}
+FfxFloat32 QuadReadY(FfxFloat32 v)
+{
+    return QuadReadAcrossY(v);
+}
+FfxFloat32x2 QuadReadY(FfxFloat32x2 v)
+{
+    return QuadReadAcrossY(v);
+}
+
+#if FFX_HALF
+FfxFloat16x2 ffxWaveXorFloat16x2(FfxFloat16x2 v, FfxUInt32 x)
+{
+    return FFX_UINT32_TO_FLOAT16X2(WaveReadLaneAt(FFX_FLOAT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x));
+}
+FfxFloat16x4 ffxWaveXorFloat16x4(FfxFloat16x4 v, FfxUInt32 x)
+{
+    return FFX_UINT32X2_TO_FLOAT16X4(WaveReadLaneAt(FFX_FLOAT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x));
+}
+FfxUInt16x2 ffxWaveXorUint16x2(FfxUInt16x2 v, FfxUInt32 x)
+{
+    return FFX_UINT32_TO_UINT16X2(WaveReadLaneAt(FFX_UINT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x));
+}
+FfxUInt16x4 ffxWaveXorUint16x4(FfxUInt16x4 v, FfxUInt32 x)
+{
+    return FFX_UINT32X2_TO_UINT16X4(WaveReadLaneAt(FFX_UINT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x));
+}
+#endif // FFX_HALF
+#endif // #if defined(FFX_WAVE)
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_hlsl.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_hlsl.h.meta
new file mode 100644
index 00000000..ff56bee5
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_hlsl.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: 299f67e8b7e1d1a48a577bf8b328ac92
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_portability.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_portability.h
new file mode 100644
index 00000000..84a62d6b
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_portability.h
@@ -0,0 +1,51 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+FfxFloat32x3 opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
+{
+    d = a + ffxBroadcast3(b);
+    return d;
+}
+
+FfxFloat32x3 opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a)
+{
+    d = a;
+    return d;
+}
+
+FfxFloat32x3 opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b)
+{
+    d = a * b;
+    return d;
+}
+
+FfxFloat32x3 opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
+{
+    d = a * ffxBroadcast3(b);
+    return d;
+}
+
+FfxFloat32x3 opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a)
+{
+    d = rcp(a);
+    return d;
+}
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_portability.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_portability.h.meta
new file mode 100644
index 00000000..25ff64b3
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_core_portability.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: 8d2ace0bd52e0e1438e08ddaccd3ba24
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h
new file mode 100644
index 00000000..c425de79
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h
@@ -0,0 +1,288 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#ifndef FFX_FSR3UPSCALER_ACCUMULATE_H
+#define FFX_FSR3UPSCALER_ACCUMULATE_H
+
+FfxFloat32 GetPxHrVelocity(FfxFloat32x2 fMotionVector)
+{
+    return length(fMotionVector * DisplaySize());
+}
+#if FFX_HALF
+FFX_MIN16_F GetPxHrVelocity(FFX_MIN16_F2 fMotionVector)
+{
+    return length(fMotionVector * FFX_MIN16_F2(DisplaySize()));
+}
+#endif
+
+void Accumulate(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor, FfxFloat32x3 fAccumulation, FFX_PARAMETER_IN FfxFloat32x4 fUpsampledColorAndWeight)
+{
+    // Aviod invalid values when accumulation and upsampled weight is 0
+    fAccumulation = ffxMax(FSR3UPSCALER_EPSILON.xxx, fAccumulation + fUpsampledColorAndWeight.www);
+
+#if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT
+    //YCoCg -> RGB -> Tonemap -> YCoCg (Use RGB tonemapper to avoid color desaturation)
+    fUpsampledColorAndWeight.xyz = RGBToYCoCg(Tonemap(YCoCgToRGB(fUpsampledColorAndWeight.xyz)));
+    fHistoryColor = RGBToYCoCg(Tonemap(YCoCgToRGB(fHistoryColor)));
+#endif
+
+    const FfxFloat32x3 fAlpha = fUpsampledColorAndWeight.www / fAccumulation;
+    fHistoryColor = ffxLerp(fHistoryColor, fUpsampledColorAndWeight.xyz, fAlpha);
+
+    fHistoryColor = YCoCgToRGB(fHistoryColor);
+
+#if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT
+    fHistoryColor = InverseTonemap(fHistoryColor);
+#endif
+}
+
+void RectifyHistory(
+    const AccumulationPassCommonParams params,
+    RectificationBox clippingBox,
+    FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor,
+    FFX_PARAMETER_INOUT FfxFloat32x3 fAccumulation,
+    FfxFloat32 fLockContributionThisFrame,
+    FfxFloat32 fTemporalReactiveFactor,
+    FfxFloat32 fLumaInstabilityFactor)
+{
+    const FfxFloat32 fVecolityFactor = ffxSaturate(params.fHrVelocity / 20.0f);
+    const FfxFloat32 fBoxScaleT = ffxMax(params.fDepthClipFactor, ffxMax(params.fAccumulationMask, fVecolityFactor));
+    const FfxFloat32 fBoxScale = ffxLerp(3.0f, 1.0f, fBoxScaleT);
+
+    const FfxFloat32x3 fScaledBoxVec = clippingBox.boxVec * fBoxScale;
+    const FfxFloat32x3 boxMin = clippingBox.boxCenter - fScaledBoxVec;
+    const FfxFloat32x3 boxMax = clippingBox.boxCenter + fScaledBoxVec;
+
+    if (any(FFX_GREATER_THAN(boxMin, fHistoryColor)) || any(FFX_GREATER_THAN(fHistoryColor, boxMax))) {
+
+        const FfxFloat32x3 fClampedHistoryColor = clamp(fHistoryColor, boxMin, boxMax);
+
+        FfxFloat32x3 fHistoryContribution = ffxMax(fLumaInstabilityFactor, fLockContributionThisFrame).xxx;
+        
+        const FfxFloat32 fReactiveFactor = params.fDilatedReactiveFactor;
+        const FfxFloat32 fReactiveContribution = 1.0f - ffxPow(fReactiveFactor, 1.0f / 2.0f);
+        fHistoryContribution *= fReactiveContribution;
+
+        // Scale history color using rectification info, also using accumulation mask to avoid potential invalid color protection
+        fHistoryColor = ffxLerp(fClampedHistoryColor, fHistoryColor, ffxSaturate(fHistoryContribution));
+
+        // Scale accumulation using rectification info
+        const FfxFloat32x3 fAccumulationMin = ffxMin(fAccumulation, FFX_BROADCAST_FLOAT32X3(0.1f));
+        fAccumulation = ffxLerp(fAccumulationMin, fAccumulation, ffxSaturate(fHistoryContribution));
+    }
+}
+
+void WriteUpscaledOutput(FfxInt32x2 iPxHrPos, FfxFloat32x3 fUpscaledColor)
+{
+    StoreUpscaledOutput(iPxHrPos, fUpscaledColor);
+}
+
+void FinalizeLockStatus(const AccumulationPassCommonParams params, FfxFloat32x2 fLockStatus, FfxFloat32 fUpsampledWeight)
+{
+    // we expect similar motion for next frame
+    // kill lock if that location is outside screen, avoid locks to be clamped to screen borders
+    FfxFloat32x2 fEstimatedUvNextFrame = params.fHrUv - params.fMotionVector;
+    if (IsUvInside(fEstimatedUvNextFrame) == false) {
+        KillLock(fLockStatus);
+    }
+    else {
+        // Decrease lock lifetime
+        const FfxFloat32 fLifetimeDecreaseLanczosMax = FfxFloat32(JitterSequenceLength()) * FfxFloat32(fAverageLanczosWeightPerFrame);
+        const FfxFloat32 fLifetimeDecrease = FfxFloat32(fUpsampledWeight / fLifetimeDecreaseLanczosMax);
+        fLockStatus[LOCK_LIFETIME_REMAINING] = ffxMax(FfxFloat32(0), fLockStatus[LOCK_LIFETIME_REMAINING] - fLifetimeDecrease);
+    }
+
+    StoreLockStatus(params.iPxHrPos, fLockStatus);
+}
+
+
+FfxFloat32x3 ComputeBaseAccumulationWeight(const AccumulationPassCommonParams params, FfxFloat32 fThisFrameReactiveFactor, FfxBoolean bInMotionLastFrame, FfxFloat32 fUpsampledWeight, LockState lockState)
+{
+    // Always assume max accumulation was reached
+    FfxFloat32 fBaseAccumulation = fMaxAccumulationLanczosWeight * FfxFloat32(params.bIsExistingSample) * (1.0f - fThisFrameReactiveFactor) * (1.0f - params.fDepthClipFactor);
+
+    fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight * 10.0f, ffxMax(FfxFloat32(bInMotionLastFrame), ffxSaturate(params.fHrVelocity * FfxFloat32(10)))));
+
+    fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight, ffxSaturate(params.fHrVelocity / FfxFloat32(20))));
+
+    return fBaseAccumulation.xxx;
+}
+
+FfxFloat32 ComputeLumaInstabilityFactor(const AccumulationPassCommonParams params, RectificationBox clippingBox, FfxFloat32 fThisFrameReactiveFactor, FfxFloat32 fLuminanceDiff)
+{
+    const FfxFloat32 fUnormThreshold = 1.0f / 255.0f;
+    const FfxInt32 N_MINUS_1 = 0;
+    const FfxInt32 N_MINUS_2 = 1;
+    const FfxInt32 N_MINUS_3 = 2;
+    const FfxInt32 N_MINUS_4 = 3;
+
+    FfxFloat32 fCurrentFrameLuma = clippingBox.boxCenter.x;
+
+#if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT
+    fCurrentFrameLuma = fCurrentFrameLuma / (1.0f + ffxMax(0.0f, fCurrentFrameLuma));
+#endif
+
+    fCurrentFrameLuma = round(fCurrentFrameLuma * 255.0f) / 255.0f;
+
+    const FfxBoolean bSampleLumaHistory = (ffxMax(ffxMax(params.fDepthClipFactor, params.fAccumulationMask), fLuminanceDiff) < 0.1f) && (params.bIsNewSample == false);
+    FfxFloat32x4 fCurrentFrameLumaHistory = bSampleLumaHistory ? SampleLumaHistory(params.fReprojectedHrUv) : FFX_BROADCAST_FLOAT32X4(0.0f);
+
+    FfxFloat32 fLumaInstability = 0.0f;
+    FfxFloat32 fDiffs0 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[N_MINUS_1]);
+
+    FfxFloat32 fMin = abs(fDiffs0);
+
+    if (fMin >= fUnormThreshold) {
+        for (int i = N_MINUS_2; i <= N_MINUS_4; i++) {
+            FfxFloat32 fDiffs1 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[i]);
+
+            if (sign(fDiffs0) == sign(fDiffs1)) {
+                
+                // Scale difference to protect historically similar values
+                const FfxFloat32 fMinBias = 1.0f;
+                fMin = ffxMin(fMin, abs(fDiffs1) * fMinBias);
+            }
+        }
+
+        const FfxFloat32 fBoxSize       = clippingBox.boxVec.x;
+        const FfxFloat32 fBoxSizeFactor = ffxPow(ffxSaturate(fBoxSize / 0.1f), 6.0f);
+
+        fLumaInstability = FfxFloat32(fMin != abs(fDiffs0)) * fBoxSizeFactor;
+        fLumaInstability = FfxFloat32(fLumaInstability > fUnormThreshold);
+
+        fLumaInstability *= 1.0f - ffxMax(params.fAccumulationMask, ffxPow(fThisFrameReactiveFactor, 1.0f / 6.0f));
+    }
+
+    //shift history
+    fCurrentFrameLumaHistory[N_MINUS_4] = fCurrentFrameLumaHistory[N_MINUS_3];
+    fCurrentFrameLumaHistory[N_MINUS_3] = fCurrentFrameLumaHistory[N_MINUS_2];
+    fCurrentFrameLumaHistory[N_MINUS_2] = fCurrentFrameLumaHistory[N_MINUS_1];
+    fCurrentFrameLumaHistory[N_MINUS_1] = fCurrentFrameLuma;
+
+    StoreLumaHistory(params.iPxHrPos, fCurrentFrameLumaHistory);
+
+    return fLumaInstability * FfxFloat32(fCurrentFrameLumaHistory[N_MINUS_4] != 0);
+}
+
+FfxFloat32 ComputeTemporalReactiveFactor(const AccumulationPassCommonParams params, FfxFloat32 fTemporalReactiveFactor)
+{
+    FfxFloat32 fNewFactor = ffxMin(0.99f, fTemporalReactiveFactor);
+
+    fNewFactor = ffxMax(fNewFactor, ffxLerp(fNewFactor, 0.4f, ffxSaturate(params.fHrVelocity)));
+
+    fNewFactor = ffxMax(fNewFactor * fNewFactor, ffxMax(params.fDepthClipFactor * 0.1f, params.fDilatedReactiveFactor));
+
+    // Force reactive factor for new samples
+    fNewFactor = params.bIsNewSample ? 1.0f : fNewFactor;
+
+    if (ffxSaturate(params.fHrVelocity * 10.0f) >= 1.0f) {
+        fNewFactor = ffxMax(FSR3UPSCALER_EPSILON, fNewFactor) * -1.0f;
+    }
+    
+    return fNewFactor;
+}
+
+AccumulationPassCommonParams InitParams(FfxInt32x2 iPxHrPos)
+{
+    AccumulationPassCommonParams params;
+
+    params.iPxHrPos = iPxHrPos;
+    const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize();
+    params.fHrUv = fHrUv;
+    
+    const FfxFloat32x2 fLrUvJittered = fHrUv + Jitter() / RenderSize();
+    params.fLrUv_HwSampler = ClampUv(fLrUvJittered, RenderSize(), MaxRenderSize());
+
+    params.fMotionVector = GetMotionVector(iPxHrPos, fHrUv);
+    params.fHrVelocity = GetPxHrVelocity(params.fMotionVector);
+
+    ComputeReprojectedUVs(params, params.fReprojectedHrUv, params.bIsExistingSample);
+
+    params.fDepthClipFactor = ffxSaturate(SampleDepthClip(params.fLrUv_HwSampler));
+    
+    const FfxFloat32x2 fDilatedReactiveMasks = SampleDilatedReactiveMasks(params.fLrUv_HwSampler);
+    params.fDilatedReactiveFactor = fDilatedReactiveMasks.x;
+    params.fAccumulationMask = fDilatedReactiveMasks.y;
+    params.bIsResetFrame = (0 == FrameIndex());
+
+    params.bIsNewSample = (params.bIsExistingSample == false || params.bIsResetFrame);
+
+    return params;
+}
+
+void Accumulate(FfxInt32x2 iPxHrPos)
+{
+    const AccumulationPassCommonParams params = InitParams(iPxHrPos);
+
+    FfxFloat32x3 fHistoryColor = FfxFloat32x3(0, 0, 0);
+    FfxFloat32x2 fLockStatus;
+    InitializeNewLockSample(fLockStatus);
+
+    FfxFloat32 fTemporalReactiveFactor = 0.0f;
+    FfxBoolean bInMotionLastFrame = FFX_FALSE;
+    LockState lockState = { FFX_FALSE , FFX_FALSE };
+    if (params.bIsExistingSample && !params.bIsResetFrame) {
+        ReprojectHistoryColor(params, fHistoryColor, fTemporalReactiveFactor, bInMotionLastFrame);
+        lockState = ReprojectHistoryLockStatus(params, fLockStatus);
+    }
+
+    FfxFloat32 fThisFrameReactiveFactor = ffxMax(params.fDilatedReactiveFactor, fTemporalReactiveFactor);
+
+    FfxFloat32 fLuminanceDiff = 0.0f;
+    FfxFloat32 fLockContributionThisFrame = 0.0f;
+    UpdateLockStatus(params, fThisFrameReactiveFactor, lockState, fLockStatus, fLockContributionThisFrame, fLuminanceDiff);
+
+    // Load upsampled input color
+    RectificationBox clippingBox;
+    FfxFloat32x4 fUpsampledColorAndWeight = ComputeUpsampledColorAndWeight(params, clippingBox, fThisFrameReactiveFactor);
+    
+    const FfxFloat32 fLumaInstabilityFactor = ComputeLumaInstabilityFactor(params, clippingBox, fThisFrameReactiveFactor, fLuminanceDiff);
+
+
+    FfxFloat32x3 fAccumulation = ComputeBaseAccumulationWeight(params, fThisFrameReactiveFactor, bInMotionLastFrame, fUpsampledColorAndWeight.w, lockState);
+
+    if (params.bIsNewSample) {
+        fHistoryColor = YCoCgToRGB(fUpsampledColorAndWeight.xyz);
+    }
+    else {
+        RectifyHistory(params, clippingBox, fHistoryColor, fAccumulation, fLockContributionThisFrame, fThisFrameReactiveFactor, fLumaInstabilityFactor);
+
+        Accumulate(params, fHistoryColor, fAccumulation, fUpsampledColorAndWeight);
+    }
+
+    fHistoryColor = UnprepareRgb(fHistoryColor, Exposure());
+
+    FinalizeLockStatus(params, fLockStatus, fUpsampledColorAndWeight.w);
+
+    // Get new temporal reactive factor
+    fTemporalReactiveFactor = ComputeTemporalReactiveFactor(params, fThisFrameReactiveFactor);
+
+    StoreInternalColorAndWeight(iPxHrPos, FfxFloat32x4(fHistoryColor, fTemporalReactiveFactor));
+
+    // Output final color when RCAS is disabled
+#if FFX_FSR3UPSCALER_OPTION_APPLY_SHARPENING == 0
+    WriteUpscaledOutput(iPxHrPos, fHistoryColor);
+#endif
+    StoreNewLocks(iPxHrPos, 0);
+}
+
+#endif // FFX_FSR3UPSCALER_ACCUMULATE_H
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h.meta
new file mode 100644
index 00000000..08c98e7c
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: 3fc2f7a2c8c31324a949e1761bf599cc
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h
new file mode 100644
index 00000000..13b317ab
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h
@@ -0,0 +1,928 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#include "ffx_fsr3upscaler_resources.h"
+
+#if defined(FFX_GPU)
+#ifdef __hlsl_dx_compiler
+#pragma dxc diagnostic push
+#pragma dxc diagnostic ignored "-Wambig-lit-shift"
+#endif //__hlsl_dx_compiler
+#include "ffx_core.h"
+#ifdef __hlsl_dx_compiler
+#pragma dxc diagnostic pop
+#endif //__hlsl_dx_compiler
+#endif // #if defined(FFX_GPU)
+
+#if defined(FFX_GPU)
+#ifndef FFX_PREFER_WAVE64
+#define FFX_PREFER_WAVE64
+#endif // FFX_PREFER_WAVE64
+
+#if defined(FFX_GPU)
+#pragma warning(disable: 3205)  // conversion from larger type to smaller
+#endif // #if defined(FFX_GPU)
+
+#define DECLARE_SRV_REGISTER(regIndex)  t##regIndex
+#define DECLARE_UAV_REGISTER(regIndex)  u##regIndex
+#define DECLARE_CB_REGISTER(regIndex)   b##regIndex
+#define FFX_FSR3UPSCALER_DECLARE_SRV(regIndex)  register(DECLARE_SRV_REGISTER(regIndex))
+#define FFX_FSR3UPSCALER_DECLARE_UAV(regIndex)  register(DECLARE_UAV_REGISTER(regIndex))
+#define FFX_FSR3UPSCALER_DECLARE_CB(regIndex)   register(DECLARE_CB_REGISTER(regIndex))
+
+#if defined(FSR3UPSCALER_BIND_CB_FSR3UPSCALER)
+    cbuffer cbFSR3Upscaler : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_FSR3UPSCALER)
+    {
+        FfxInt32x2    iRenderSize;
+        FfxInt32x2    iMaxRenderSize;
+        FfxInt32x2    iDisplaySize;
+        FfxInt32x2    iInputColorResourceDimensions;
+        FfxInt32x2    iLumaMipDimensions;
+        FfxInt32      iLumaMipLevelToUse;
+        FfxInt32      iFrameIndex;
+
+        FfxFloat32x4  fDeviceToViewDepth;
+        FfxFloat32x2  fJitter;
+        FfxFloat32x2  fMotionVectorScale;
+        FfxFloat32x2  fDownscaleFactor;
+        FfxFloat32x2  fMotionVectorJitterCancellation;
+        FfxFloat32    fPreExposure;
+        FfxFloat32    fPreviousFramePreExposure;
+        FfxFloat32    fTanHalfFOV;
+        FfxFloat32    fJitterSequenceLength;
+        FfxFloat32    fDeltaTime;
+        FfxFloat32    fDynamicResChangeFactor;
+        FfxFloat32    fViewSpaceToMetersFactor;
+
+        FfxInt32      iDummy;
+    };
+
+#define FFX_FSR3UPSCALER_CONSTANT_BUFFER_1_SIZE (sizeof(cbFSR3Upscaler) / 4)  // Number of 32-bit values. This must be kept in sync with the cbFSR3Upscaler size.
+
+/* Define getter functions in the order they are defined in the CB! */
+FfxInt32x2 RenderSize()
+{
+    return iRenderSize;
+}
+
+FfxInt32x2 MaxRenderSize()
+{
+    return iMaxRenderSize;
+}
+
+FfxInt32x2 DisplaySize()
+{
+    return iDisplaySize;
+}
+
+FfxInt32x2 InputColorResourceDimensions()
+{
+    return iInputColorResourceDimensions;
+}
+
+FfxInt32x2 LumaMipDimensions()
+{
+    return iLumaMipDimensions;
+}
+
+FfxInt32  LumaMipLevelToUse()
+{
+    return iLumaMipLevelToUse;
+}
+
+FfxInt32 FrameIndex()
+{
+    return iFrameIndex;
+}
+
+FfxFloat32x2 Jitter()
+{
+    return fJitter;
+}
+
+FfxFloat32x4 DeviceToViewSpaceTransformFactors()
+{
+    return fDeviceToViewDepth;
+}
+
+FfxFloat32x2 MotionVectorScale()
+{
+    return fMotionVectorScale;
+}
+
+FfxFloat32x2 DownscaleFactor()
+{
+    return fDownscaleFactor;
+}
+
+FfxFloat32x2 MotionVectorJitterCancellation()
+{
+    return fMotionVectorJitterCancellation;
+}
+
+FfxFloat32 PreExposure()
+{
+    return fPreExposure;
+}
+
+FfxFloat32 PreviousFramePreExposure()
+{
+    return fPreviousFramePreExposure;
+}
+
+FfxFloat32 TanHalfFoV()
+{
+    return fTanHalfFOV;
+}
+
+FfxFloat32 JitterSequenceLength()
+{
+    return fJitterSequenceLength;
+}
+
+FfxFloat32 DeltaTime()
+{
+    return fDeltaTime;
+}
+
+FfxFloat32 DynamicResChangeFactor()
+{
+    return fDynamicResChangeFactor;
+}
+
+FfxFloat32 ViewSpaceToMetersFactor()
+{
+    return fViewSpaceToMetersFactor;
+}
+#endif // #if defined(FSR3UPSCALER_BIND_CB_FSR3UPSCALER)
+
+#define FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(p) FFX_FSR3UPSCALER_ROOTSIG_STR(p)
+#define FFX_FSR3UPSCALER_ROOTSIG_STR(p) #p
+#define FFX_FSR3UPSCALER_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \
+                                    "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \
+                                    "RootConstants(num32BitConstants=" FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_CONSTANT_BUFFER_1_SIZE) ", b0), " \
+                                    "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \
+                                                      "addressU = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "addressV = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "addressW = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "comparisonFunc = COMPARISON_NEVER, " \
+                                                      "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \
+                                    "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
+                                                      "addressU = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "addressV = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "addressW = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "comparisonFunc = COMPARISON_NEVER, " \
+                                                      "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
+
+#define FFX_FSR3UPSCALER_CONSTANT_BUFFER_2_SIZE 6  // Number of 32-bit values. This must be kept in sync with max( cbRCAS , cbSPD) size.
+
+#define FFX_FSR3UPSCALER_CB2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \
+                                    "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \
+                                    "RootConstants(num32BitConstants=" FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_CONSTANT_BUFFER_1_SIZE) ", b0), " \
+                                    "RootConstants(num32BitConstants=" FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_CONSTANT_BUFFER_2_SIZE) ", b1), " \
+                                    "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \
+                                                      "addressU = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "addressV = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "addressW = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "comparisonFunc = COMPARISON_NEVER, " \
+                                                      "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \
+                                    "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
+                                                      "addressU = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "addressV = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "addressW = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "comparisonFunc = COMPARISON_NEVER, " \
+                                                      "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
+#if defined(FFX_FSR3UPSCALER_EMBED_ROOTSIG)
+#define FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT FFX_FSR3UPSCALER_ROOTSIG
+#define FFX_FSR3UPSCALER_EMBED_CB2_ROOTSIG_CONTENT FFX_FSR3UPSCALER_CB2_ROOTSIG
+#else
+#define FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT
+#define FFX_FSR3UPSCALER_EMBED_CB2_ROOTSIG_CONTENT
+#endif // #if FFX_FSR3UPSCALER_EMBED_ROOTSIG
+
+#if defined(FSR3UPSCALER_BIND_CB_AUTOREACTIVE)
+cbuffer cbGenerateReactive : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_AUTOREACTIVE)
+{
+    FfxFloat32   fTcThreshold; // 0.1 is a good starting value, lower will result in more TC pixels
+    FfxFloat32   fTcScale;
+    FfxFloat32   fReactiveScale;
+    FfxFloat32   fReactiveMax;
+};
+
+FfxFloat32 TcThreshold()
+{
+    return fTcThreshold;
+}
+
+FfxFloat32 TcScale()
+{
+    return fTcScale;
+}
+
+FfxFloat32 ReactiveScale()
+{
+    return fReactiveScale;
+}
+
+FfxFloat32 ReactiveMax()
+{
+    return fReactiveMax;
+}
+#endif // #if defined(FSR3UPSCALER_BIND_CB_AUTOREACTIVE)
+
+#if defined(FSR3UPSCALER_BIND_CB_RCAS)
+cbuffer cbRCAS : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_RCAS)
+{
+    FfxUInt32x4 rcasConfig;
+};
+
+FfxUInt32x4 RCASConfig()
+{
+    return rcasConfig;
+}
+#endif // #if defined(FSR3UPSCALER_BIND_CB_RCAS)
+
+
+#if defined(FSR3UPSCALER_BIND_CB_REACTIVE)
+cbuffer cbGenerateReactive : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_REACTIVE)
+{
+    FfxFloat32   gen_reactive_scale;
+    FfxFloat32   gen_reactive_threshold;
+    FfxFloat32   gen_reactive_binaryValue;
+    FfxUInt32    gen_reactive_flags;
+};
+
+FfxFloat32 GenReactiveScale()
+{
+    return gen_reactive_scale;
+}
+
+FfxFloat32 GenReactiveThreshold()
+{
+    return gen_reactive_threshold;
+}
+
+FfxFloat32 GenReactiveBinaryValue()
+{
+    return gen_reactive_binaryValue;
+}
+
+FfxUInt32 GenReactiveFlags()
+{
+    return gen_reactive_flags;
+}
+#endif // #if defined(FSR3UPSCALER_BIND_CB_REACTIVE)
+
+#if defined(FSR3UPSCALER_BIND_CB_SPD)
+cbuffer cbSPD : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_SPD) {
+
+    FfxUInt32   mips;
+    FfxUInt32   numWorkGroups;
+    FfxUInt32x2 workGroupOffset;
+    FfxUInt32x2 renderSize;
+};
+
+FfxUInt32 MipCount()
+{
+    return mips;
+}
+
+FfxUInt32 NumWorkGroups()
+{
+    return numWorkGroups;
+}
+
+FfxUInt32x2 WorkGroupOffset()
+{
+    return workGroupOffset;
+}
+
+FfxUInt32x2 SPD_RenderSize()
+{
+    return renderSize;
+}
+#endif // #if defined(FSR3UPSCALER_BIND_CB_SPD)
+
+// Declare and sample camera buffers as regular textures, unless overridden
+#if !defined(UNITY_FSR3_TEX2D)
+#define UNITY_FSR3_TEX2D(type)      Texture2D<type>
+#endif
+#if !defined(UNITY_FSR3_RWTEX2D)
+#define UNITY_FSR3_RWTEX2D(type)    RWTexture2D<type>
+#endif
+#if !defined(UNITY_FSR3_POS)
+#define UNITY_FSR3_POS(pxPos)       (pxPos)
+#endif
+#if !defined(UNITY_FSR3_UV)
+#define UNITY_FSR3_UV(uv)           (uv)
+#endif
+
+SamplerState s_PointClamp : register(s0);
+SamplerState s_LinearClamp : register(s1);
+
+    // SRVs
+    #if defined FSR3UPSCALER_BIND_SRV_INPUT_COLOR
+        UNITY_FSR3_TEX2D(FfxFloat32x4)            r_input_color_jittered                    : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_COLOR);
+    #endif
+    #if defined FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY
+        UNITY_FSR3_TEX2D(FfxFloat32x4)            r_input_opaque_only                       : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY);
+    #endif
+    #if defined FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS
+        UNITY_FSR3_TEX2D(FfxFloat32x4)            r_input_motion_vectors                    : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS);
+    #endif
+    #if defined FSR3UPSCALER_BIND_SRV_INPUT_DEPTH
+        UNITY_FSR3_TEX2D(FfxFloat32)              r_input_depth                             : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_DEPTH);
+    #endif 
+    #if defined FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE
+        Texture2D<FfxFloat32x2>                   r_input_exposure                          : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE);
+    #endif
+    #if defined FSR3UPSCALER_BIND_SRV_AUTO_EXPOSURE
+        Texture2D<FfxFloat32x2>                   r_auto_exposure                           : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_AUTO_EXPOSURE);
+    #endif
+    #if defined FSR3UPSCALER_BIND_SRV_REACTIVE_MASK
+        UNITY_FSR3_TEX2D(FfxFloat32)              r_reactive_mask                           : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_REACTIVE_MASK);
+    #endif 
+    #if defined FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK
+        UNITY_FSR3_TEX2D(FfxFloat32)              r_transparency_and_composition_mask       : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK);
+    #endif
+    #if defined FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH
+        Texture2D<FfxUInt32>                      r_reconstructed_previous_nearest_depth    : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH);
+    #endif 
+    #if defined FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS
+       Texture2D<FfxFloat32x2>                    r_dilated_motion_vectors                  : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS);
+    #endif
+    #if defined FSR3UPSCALER_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS
+           Texture2D<FfxFloat32x2>                r_previous_dilated_motion_vectors         : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS);
+    #endif
+    #if defined FSR3UPSCALER_BIND_SRV_DILATED_DEPTH
+        Texture2D<FfxFloat32>                     r_dilated_depth                           : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_DILATED_DEPTH);
+    #endif
+    #if defined FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED
+        Texture2D<FfxFloat32x4>                   r_internal_upscaled_color                 : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED);
+    #endif
+    #if defined FSR3UPSCALER_BIND_SRV_LOCK_STATUS
+        Texture2D<unorm FfxFloat32x2>             r_lock_status                             : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_LOCK_STATUS);
+    #endif
+    #if defined FSR3UPSCALER_BIND_SRV_LOCK_INPUT_LUMA
+        Texture2D<FfxFloat32>                     r_lock_input_luma                         : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_LOCK_INPUT_LUMA);
+    #endif
+    #if defined FSR3UPSCALER_BIND_SRV_NEW_LOCKS
+        Texture2D<unorm FfxFloat32>               r_new_locks                               : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_NEW_LOCKS);
+    #endif
+    #if defined FSR3UPSCALER_BIND_SRV_PREPARED_INPUT_COLOR
+        Texture2D<FfxFloat32x4>                  r_prepared_input_color                    : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_PREPARED_INPUT_COLOR);
+    #endif
+    #if defined FSR3UPSCALER_BIND_SRV_LUMA_HISTORY
+        Texture2D<unorm FfxFloat32x4>             r_luma_history                            : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_LUMA_HISTORY);
+    #endif
+    #if defined FSR3UPSCALER_BIND_SRV_RCAS_INPUT
+        Texture2D<FfxFloat32x4>                   r_rcas_input                              : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_RCAS_INPUT);
+    #endif
+    #if defined FSR3UPSCALER_BIND_SRV_LANCZOS_LUT
+        Texture2D<FfxFloat32>                     r_lanczos_lut                             : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_LANCZOS_LUT);
+    #endif
+    #if defined FSR3UPSCALER_BIND_SRV_SCENE_LUMINANCE_MIPS
+        Texture2D<FfxFloat32>                     r_imgMips                                 : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_SCENE_LUMINANCE_MIPS);
+    #endif
+    #if defined FSR3UPSCALER_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT
+        Texture2D<FfxFloat32>                     r_upsample_maximum_bias_lut               : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT);
+    #endif
+    #if defined FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS
+        Texture2D<unorm FfxFloat32x2>             r_dilated_reactive_masks                  : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS);
+    #endif
+
+    #if defined FSR3UPSCALER_BIND_SRV_PREV_PRE_ALPHA_COLOR
+        Texture2D<float3>                         r_input_prev_color_pre_alpha              : FFX_FSR3UPSCALER_DECLARE_SRV(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR);
+    #endif
+    #if defined FSR3UPSCALER_BIND_SRV_PREV_POST_ALPHA_COLOR
+        Texture2D<float3>                         r_input_prev_color_post_alpha             : FFX_FSR3UPSCALER_DECLARE_SRV(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR);
+    #endif
+   
+    // UAV declarations
+    #if defined FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH
+        RWTexture2D<FfxUInt32>                    rw_reconstructed_previous_nearest_depth   : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH);
+    #endif
+    #if defined FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS
+        RWTexture2D<FfxFloat32x2>                 rw_dilated_motion_vectors                 : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS);
+    #endif
+    #if defined FSR3UPSCALER_BIND_UAV_DILATED_DEPTH
+        RWTexture2D<FfxFloat32>                   rw_dilated_depth                          : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_DILATED_DEPTH);
+    #endif
+    #if defined FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED
+        RWTexture2D<FfxFloat32x4>                 rw_internal_upscaled_color                : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED);
+    #endif
+    #if defined FSR3UPSCALER_BIND_UAV_LOCK_STATUS
+        RWTexture2D<unorm FfxFloat32x2>           rw_lock_status                            : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_LOCK_STATUS);
+    #endif
+    #if defined FSR3UPSCALER_BIND_UAV_LOCK_INPUT_LUMA
+        RWTexture2D<FfxFloat32>                   rw_lock_input_luma                        : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_LOCK_INPUT_LUMA);
+    #endif
+    #if defined FSR3UPSCALER_BIND_UAV_NEW_LOCKS
+        RWTexture2D<unorm FfxFloat32>             rw_new_locks                              : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_NEW_LOCKS);
+    #endif
+    #if defined FSR3UPSCALER_BIND_UAV_PREPARED_INPUT_COLOR
+        RWTexture2D<FfxFloat32x4>                 rw_prepared_input_color                   : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_PREPARED_INPUT_COLOR);
+    #endif
+    #if defined FSR3UPSCALER_BIND_UAV_LUMA_HISTORY
+        RWTexture2D<FfxFloat32x4>                 rw_luma_history                           : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_LUMA_HISTORY);
+    #endif
+    #if defined FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT
+        UNITY_FSR3_RWTEX2D(FfxFloat32x4)          rw_upscaled_output                        : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT);
+    #endif
+    #if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
+        globallycoherent RWTexture2D<FfxFloat32>  rw_img_mip_shading_change                 : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE);
+    #endif
+    #if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5
+        globallycoherent RWTexture2D<FfxFloat32>  rw_img_mip_5                              : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5);
+    #endif
+    #if defined FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS
+        RWTexture2D<unorm FfxFloat32x2>           rw_dilated_reactive_masks                 : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS);
+    #endif
+    #if defined FSR3UPSCALER_BIND_UAV_EXPOSURE
+        RWTexture2D<FfxFloat32x2>                 rw_exposure                               : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_EXPOSURE);
+    #endif
+    #if defined FSR3UPSCALER_BIND_UAV_AUTO_EXPOSURE
+        RWTexture2D<FfxFloat32x2>                 rw_auto_exposure                          : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_AUTO_EXPOSURE);
+    #endif
+    #if defined FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC
+        globallycoherent RWTexture2D<FfxUInt32>   rw_spd_global_atomic                      : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC);
+    #endif
+
+    #if defined FSR3UPSCALER_BIND_UAV_AUTOREACTIVE
+        RWTexture2D<float>                        rw_output_autoreactive                    : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_AUTOREACTIVE);
+    #endif
+    #if defined FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION
+        RWTexture2D<float>                        rw_output_autocomposition                 : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION);
+    #endif
+    #if defined FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR
+        RWTexture2D<float3>                       rw_output_prev_color_pre_alpha            : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR);
+    #endif
+    #if defined FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR
+        RWTexture2D<float3>                       rw_output_prev_color_post_alpha           : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR);
+    #endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_SCENE_LUMINANCE_MIPS)
+FfxFloat32 LoadMipLuma(FfxUInt32x2 iPxPos, FfxUInt32 mipLevel)
+{
+    return r_imgMips.mips[mipLevel][iPxPos];
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_SCENE_LUMINANCE_MIPS)
+FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxUInt32 mipLevel)
+{
+    return r_imgMips.SampleLevel(s_LinearClamp, fUV, mipLevel);
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_INPUT_DEPTH)
+FfxFloat32 LoadInputDepth(FfxUInt32x2 iPxPos)
+{
+    return r_input_depth[UNITY_FSR3_POS(iPxPos)];
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_INPUT_DEPTH)
+FfxFloat32 SampleInputDepth(FfxFloat32x2 fUV)
+{
+    return r_input_depth.SampleLevel(s_LinearClamp, UNITY_FSR3_UV(fUV), 0).x;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_REACTIVE_MASK)
+FfxFloat32 LoadReactiveMask(FfxUInt32x2 iPxPos)
+{
+    return r_reactive_mask[UNITY_FSR3_POS(iPxPos)];
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK)
+FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos)
+{
+    return r_transparency_and_composition_mask[UNITY_FSR3_POS(iPxPos)];
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_INPUT_COLOR)
+FfxFloat32x3 LoadInputColor(FfxUInt32x2 iPxPos)
+{
+    return r_input_color_jittered[UNITY_FSR3_POS(iPxPos)].rgb;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_INPUT_COLOR)
+FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV)
+{
+    return r_input_color_jittered.SampleLevel(s_LinearClamp, UNITY_FSR3_UV(fUV), 0).rgb;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_PREPARED_INPUT_COLOR)
+FfxFloat32x3 LoadPreparedInputColor(FfxUInt32x2 iPxPos)
+{
+    return r_prepared_input_color[iPxPos].xyz;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS)
+FfxFloat32x2 LoadInputMotionVector(FfxUInt32x2 iPxDilatedMotionVectorPos)
+{
+    FfxFloat32x2 fSrcMotionVector = r_input_motion_vectors[UNITY_FSR3_POS(iPxDilatedMotionVectorPos)].xy;
+
+    FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale();
+
+#if FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS
+    fUvMotionVector -= MotionVectorJitterCancellation();
+#endif
+
+    return fUvMotionVector;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED)
+FfxFloat32x4 LoadHistory(FfxUInt32x2 iPxHistory)
+{
+    return r_internal_upscaled_color[iPxHistory];
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_LUMA_HISTORY)
+void StoreLumaHistory(FfxUInt32x2 iPxPos, FfxFloat32x4 fLumaHistory)
+{
+    rw_luma_history[iPxPos] = fLumaHistory;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_LUMA_HISTORY)
+FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV)
+{
+    return r_luma_history.SampleLevel(s_LinearClamp, fUV, 0);
+}
+#endif
+
+FfxFloat32x4 LoadRCAS_Input(FfxInt32x2 iPxPos)
+{
+#if defined(FSR3UPSCALER_BIND_SRV_RCAS_INPUT) 
+    return r_rcas_input[iPxPos];
+#else
+    return 0.0;
+#endif
+}
+
+#if defined(FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED)
+void StoreReprojectedHistory(FfxUInt32x2 iPxHistory, FfxFloat32x4 fHistory)
+{
+    rw_internal_upscaled_color[iPxHistory] = fHistory;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED)
+void StoreInternalColorAndWeight(FfxUInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight)
+{
+    rw_internal_upscaled_color[iPxPos] = fColorAndWeight;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT)
+void StoreUpscaledOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor)
+{
+    rw_upscaled_output[UNITY_FSR3_POS(iPxPos)] = FfxFloat32x4(fColor, 1.f);
+}
+#endif
+
+//LOCK_LIFETIME_REMAINING == 0
+//Should make LockInitialLifetime() return a const 1.0f later
+#if defined(FSR3UPSCALER_BIND_SRV_LOCK_STATUS)
+FfxFloat32x2 LoadLockStatus(FfxUInt32x2 iPxPos)
+{
+    return r_lock_status[iPxPos];
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_LOCK_STATUS)
+void StoreLockStatus(FfxUInt32x2 iPxPos, FfxFloat32x2 fLockStatus)
+{
+    rw_lock_status[iPxPos] = fLockStatus;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_LOCK_INPUT_LUMA)
+FfxFloat32 LoadLockInputLuma(FfxUInt32x2 iPxPos)
+{
+    return r_lock_input_luma[iPxPos];
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_LOCK_INPUT_LUMA)
+void StoreLockInputLuma(FfxUInt32x2 iPxPos, FfxFloat32 fLuma)
+{
+    rw_lock_input_luma[iPxPos] = fLuma;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_NEW_LOCKS)
+FfxFloat32 LoadNewLocks(FfxUInt32x2 iPxPos)
+{
+    return r_new_locks[iPxPos];
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_NEW_LOCKS)
+FfxFloat32 LoadRwNewLocks(FfxUInt32x2 iPxPos)
+{
+    return rw_new_locks[iPxPos];
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_NEW_LOCKS)
+void StoreNewLocks(FfxUInt32x2 iPxPos, FfxFloat32 newLock)
+{
+    rw_new_locks[iPxPos] = newLock;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_PREPARED_INPUT_COLOR)
+void StorePreparedInputColor(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped)
+{
+    rw_prepared_input_color[iPxPos] = fTonemapped;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_PREPARED_INPUT_COLOR)
+FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV)
+{
+    return r_prepared_input_color.SampleLevel(s_LinearClamp, fUV, 0).w;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_LOCK_STATUS)
+FfxFloat32x2 SampleLockStatus(FfxFloat32x2 fUV)
+{
+    FfxFloat32x2 fLockStatus = r_lock_status.SampleLevel(s_LinearClamp, fUV, 0);
+    return fLockStatus;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
+FfxFloat32 LoadReconstructedPrevDepth(FfxUInt32x2 iPxPos)
+{
+    return asfloat(r_reconstructed_previous_nearest_depth[iPxPos]);
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
+void StoreReconstructedDepth(FfxUInt32x2 iPxSample, FfxFloat32 fDepth)
+{
+    FfxUInt32 uDepth = asuint(fDepth);
+
+    #if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH
+        InterlockedMax(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth);
+    #else
+        InterlockedMin(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); // min for standard, max for inverted depth
+    #endif
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
+void SetReconstructedDepth(FfxUInt32x2 iPxSample, const FfxUInt32 uValue)
+{
+    rw_reconstructed_previous_nearest_depth[iPxSample] = uValue;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_DILATED_DEPTH)
+void StoreDilatedDepth(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth)
+{
+    rw_dilated_depth[iPxPos] = fDepth;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS)
+void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector)
+{
+    rw_dilated_motion_vectors[iPxPos] = fMotionVector;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS)
+FfxFloat32x2 LoadDilatedMotionVector(FfxUInt32x2 iPxInput)
+{
+    return r_dilated_motion_vectors[iPxInput].xy;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS)
+FfxFloat32x2 LoadPreviousDilatedMotionVector(FfxUInt32x2 iPxInput)
+{
+    return r_previous_dilated_motion_vectors[iPxInput].xy;
+}
+
+FfxFloat32x2 SamplePreviousDilatedMotionVector(FfxFloat32x2 uv)
+{
+    return r_previous_dilated_motion_vectors.SampleLevel(s_LinearClamp, uv, 0).xy;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_DILATED_DEPTH)
+FfxFloat32 LoadDilatedDepth(FfxUInt32x2 iPxInput)
+{
+    return r_dilated_depth[iPxInput];
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE)
+FfxFloat32 Exposure()
+{
+    FfxFloat32 exposure = r_input_exposure[FfxUInt32x2(0, 0)].x;
+
+    if (exposure == 0.0f) {
+        exposure = 1.0f;
+    }
+
+    return exposure;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_AUTO_EXPOSURE)
+FfxFloat32 AutoExposure()
+{
+    FfxFloat32 exposure = r_auto_exposure[FfxUInt32x2(0, 0)].x;
+
+    if (exposure == 0.0f) {
+        exposure = 1.0f;
+    }
+
+    return exposure;
+}
+#endif
+
+FfxFloat32 SampleLanczos2Weight(FfxFloat32 x)
+{
+#if defined(FSR3UPSCALER_BIND_SRV_LANCZOS_LUT)
+    return r_lanczos_lut.SampleLevel(s_LinearClamp, FfxFloat32x2(x / 2, 0.5f), 0);
+#else
+    return 0.f;
+#endif
+}
+
+#if defined(FSR3UPSCALER_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT)
+FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv)
+{
+    // Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range.
+    return FfxFloat32(2.0) * r_upsample_maximum_bias_lut.SampleLevel(s_LinearClamp, abs(uv) * 2.0, 0);
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS)
+FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV)
+{
+	return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0);
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS)
+FfxFloat32x2 LoadDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos)
+{
+    return r_dilated_reactive_masks[iPxPos];
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS)
+void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fDilatedReactiveMasks)
+{
+    rw_dilated_reactive_masks[iPxPos] = fDilatedReactiveMasks;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY)
+FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
+{
+    return r_input_opaque_only[UNITY_FSR3_POS(iPxPos)].xyz;
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_PREV_PRE_ALPHA_COLOR)
+FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
+{
+    return r_input_prev_color_pre_alpha[iPxPos];
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_SRV_PREV_POST_ALPHA_COLOR)
+FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
+{
+    return r_input_prev_color_post_alpha[iPxPos];
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_AUTOREACTIVE)
+#if defined(FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION)
+void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive)
+{
+    rw_output_autoreactive[iPxPos] = fReactive.x;
+
+    rw_output_autocomposition[iPxPos] = fReactive.y;
+}
+#endif
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR)
+void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
+{
+    rw_output_prev_color_pre_alpha[iPxPos] = color;
+
+}
+#endif
+
+#if defined(FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR)
+void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
+{
+    rw_output_prev_color_post_alpha[iPxPos] = color;
+}
+#endif
+
+FfxFloat32x2 SPD_LoadExposureBuffer()
+{
+#if defined FSR3UPSCALER_BIND_UAV_AUTO_EXPOSURE
+    return rw_auto_exposure[FfxInt32x2(0, 0)];
+#else
+    return FfxFloat32x2(0.f, 0.f);
+#endif // #if defined FSR3UPSCALER_BIND_UAV_AUTO_EXPOSURE
+}
+
+void SPD_SetExposureBuffer(FfxFloat32x2 value)
+{
+#if defined FSR3UPSCALER_BIND_UAV_AUTO_EXPOSURE
+    rw_auto_exposure[FfxInt32x2(0, 0)] = value;
+#endif // #if defined FSR3UPSCALER_BIND_UAV_AUTO_EXPOSURE
+}
+
+FfxFloat32x4 SPD_LoadMipmap5(FfxInt32x2 iPxPos)
+{
+#if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5
+    return FfxFloat32x4(rw_img_mip_5[iPxPos], 0, 0, 0);
+#else
+    return FfxFloat32x4(0.f, 0.f, 0.f, 0.f);
+#endif // #if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5
+}
+
+void SPD_SetMipmap(FfxInt32x2 iPxPos, FfxUInt32 slice, FfxFloat32 value)
+{
+    switch (slice)
+    {
+    case FFX_FSR3UPSCALER_SHADING_CHANGE_MIP_LEVEL:
+#if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
+        rw_img_mip_shading_change[iPxPos] = value;
+#endif // #if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
+        break;
+    case 5:
+#if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5
+        rw_img_mip_5[iPxPos] = value;
+#endif // #if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5
+        break;
+    default:
+
+        // avoid flattened side effect
+#if defined(FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE)
+        rw_img_mip_shading_change[iPxPos] = rw_img_mip_shading_change[iPxPos];
+#elif defined(FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5)
+        rw_img_mip_5[iPxPos] = rw_img_mip_5[iPxPos];
+#endif // #if defined FSR3UPSCALER_BIND_UAV_EXPOSURE_MIP_5
+        break;
+    }
+}
+
+void SPD_IncreaseAtomicCounter(inout FfxUInt32 spdCounter)
+{
+#if defined FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC
+    InterlockedAdd(rw_spd_global_atomic[FfxInt32x2(0, 0)], 1, spdCounter);
+#endif // #if defined FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC
+}
+
+void SPD_ResetAtomicCounter()
+{
+#if defined FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC
+    rw_spd_global_atomic[FfxInt32x2(0, 0)] = 0;
+#endif // #if defined FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC
+}
+
+#endif // #if defined(FFX_GPU)
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h.meta
new file mode 100644
index 00000000..e060d2c0
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: ba849fdeb042e7f458c81408414db834
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h
new file mode 100644
index 00000000..1f78a291
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h
@@ -0,0 +1,566 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#if !defined(FFX_FSR3UPSCALER_COMMON_H)
+#define FFX_FSR3UPSCALER_COMMON_H
+
+#if defined(FFX_CPU) || defined(FFX_GPU)
+//Locks
+#define LOCK_LIFETIME_REMAINING 0
+#define LOCK_TEMPORAL_LUMA 1
+#endif // #if defined(FFX_CPU) || defined(FFX_GPU)
+
+#if defined(FFX_GPU)
+FFX_STATIC const FfxFloat32 FSR3UPSCALER_FP16_MIN = 6.10e-05f;
+FFX_STATIC const FfxFloat32 FSR3UPSCALER_FP16_MAX = 65504.0f;
+FFX_STATIC const FfxFloat32 FSR3UPSCALER_EPSILON = 1e-03f;
+FFX_STATIC const FfxFloat32 FSR3UPSCALER_TONEMAP_EPSILON = 1.0f / FSR3UPSCALER_FP16_MAX;
+FFX_STATIC const FfxFloat32 FSR3UPSCALER_FLT_MAX = 3.402823466e+38f;
+FFX_STATIC const FfxFloat32 FSR3UPSCALER_FLT_MIN = 1.175494351e-38f;
+
+// treat vector truncation warnings as errors
+#pragma warning(error: 3206)
+
+// suppress warnings
+#pragma warning(disable: 3205)  // conversion from larger type to smaller
+#pragma warning(disable: 3571)  // in ffxPow(f, e), f could be negative
+
+// Reconstructed depth usage
+FFX_STATIC const FfxFloat32 fReconstructedDepthBilinearWeightThreshold = 0.01f;
+
+// Accumulation
+FFX_STATIC const FfxFloat32 fUpsampleLanczosWeightScale = 1.0f / 12.0f;
+FFX_STATIC const FfxFloat32 fMaxAccumulationLanczosWeight = 1.0f;
+FFX_STATIC const FfxFloat32 fAverageLanczosWeightPerFrame = 0.74f * fUpsampleLanczosWeightScale; // Average lanczos weight for jitter accumulated samples
+FFX_STATIC const FfxFloat32 fAccumulationMaxOnMotion = 3.0f * fUpsampleLanczosWeightScale;
+
+// Auto exposure
+FFX_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e8f;
+
+struct AccumulationPassCommonParams
+{
+    FfxInt32x2 iPxHrPos;
+    FfxFloat32x2 fHrUv;
+    FfxFloat32x2 fLrUv_HwSampler;
+    FfxFloat32x2 fMotionVector;
+    FfxFloat32x2 fReprojectedHrUv;
+    FfxFloat32 fHrVelocity;
+    FfxFloat32 fDepthClipFactor;
+    FfxFloat32 fDilatedReactiveFactor;
+    FfxFloat32 fAccumulationMask;
+
+    FfxBoolean bIsResetFrame;
+    FfxBoolean bIsExistingSample;
+    FfxBoolean bIsNewSample;
+};
+
+struct LockState
+{
+    FfxBoolean NewLock; //Set for both unique new and re-locked new
+    FfxBoolean WasLockedPrevFrame; //Set to identify if the pixel was already locked (relock)
+};
+
+void InitializeNewLockSample(FFX_PARAMETER_OUT FfxFloat32x2 fLockStatus)
+{
+    fLockStatus = FfxFloat32x2(0, 0);
+}
+
+#if FFX_HALF
+void InitializeNewLockSample(FFX_PARAMETER_OUT FFX_MIN16_F2 fLockStatus)
+{
+    fLockStatus = FFX_MIN16_F2(0, 0);
+}
+#endif
+
+
+void KillLock(FFX_PARAMETER_INOUT FfxFloat32x2 fLockStatus)
+{
+    fLockStatus[LOCK_LIFETIME_REMAINING] = 0;
+}
+
+#if FFX_HALF
+void KillLock(FFX_PARAMETER_INOUT FFX_MIN16_F2 fLockStatus)
+{
+    fLockStatus[LOCK_LIFETIME_REMAINING] = FFX_MIN16_F(0);
+}
+#endif
+
+struct RectificationBox
+{
+    FfxFloat32x3 boxCenter;
+    FfxFloat32x3 boxVec;
+    FfxFloat32x3 aabbMin;
+    FfxFloat32x3 aabbMax;
+    FfxFloat32 fBoxCenterWeight;
+};
+#if FFX_HALF
+struct RectificationBoxMin16
+{
+    FFX_MIN16_F3 boxCenter;
+    FFX_MIN16_F3 boxVec;
+    FFX_MIN16_F3 aabbMin;
+    FFX_MIN16_F3 aabbMax;
+    FFX_MIN16_F fBoxCenterWeight;
+};
+#endif
+
+void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBox rectificationBox)
+{
+    rectificationBox.fBoxCenterWeight = FfxFloat32(0);
+
+    rectificationBox.boxCenter = FfxFloat32x3(0, 0, 0);
+    rectificationBox.boxVec = FfxFloat32x3(0, 0, 0);
+    rectificationBox.aabbMin = FfxFloat32x3(FSR3UPSCALER_FLT_MAX, FSR3UPSCALER_FLT_MAX, FSR3UPSCALER_FLT_MAX);
+    rectificationBox.aabbMax = -FfxFloat32x3(FSR3UPSCALER_FLT_MAX, FSR3UPSCALER_FLT_MAX, FSR3UPSCALER_FLT_MAX);
+}
+#if FFX_HALF
+void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox)
+{
+    rectificationBox.fBoxCenterWeight = FFX_MIN16_F(0);
+
+    rectificationBox.boxCenter = FFX_MIN16_F3(0, 0, 0);
+    rectificationBox.boxVec = FFX_MIN16_F3(0, 0, 0);
+    rectificationBox.aabbMin = FFX_MIN16_F3(FSR3UPSCALER_FP16_MAX, FSR3UPSCALER_FP16_MAX, FSR3UPSCALER_FP16_MAX);
+    rectificationBox.aabbMax = -FFX_MIN16_F3(FSR3UPSCALER_FP16_MAX, FSR3UPSCALER_FP16_MAX, FSR3UPSCALER_FP16_MAX);
+}
+#endif
+
+void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight)
+{
+    rectificationBox.aabbMin = colorSample;
+    rectificationBox.aabbMax = colorSample;
+
+    FfxFloat32x3 weightedSample = colorSample * fSampleWeight;
+    rectificationBox.boxCenter = weightedSample;
+    rectificationBox.boxVec = colorSample * weightedSample;
+    rectificationBox.fBoxCenterWeight = fSampleWeight;
+}
+
+void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight)
+{
+    if (bInitialSample) {
+        RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight);
+    } else {
+        rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample);
+        rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample);
+
+        FfxFloat32x3 weightedSample = colorSample * fSampleWeight;
+        rectificationBox.boxCenter += weightedSample;
+        rectificationBox.boxVec += colorSample * weightedSample;
+        rectificationBox.fBoxCenterWeight += fSampleWeight;
+    }
+}
+#if FFX_HALF
+void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight)
+{
+    rectificationBox.aabbMin = colorSample;
+    rectificationBox.aabbMax = colorSample;
+
+    FFX_MIN16_F3 weightedSample = colorSample * fSampleWeight;
+    rectificationBox.boxCenter = weightedSample;
+    rectificationBox.boxVec = colorSample * weightedSample;
+    rectificationBox.fBoxCenterWeight = fSampleWeight;
+}
+
+void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight)
+{
+    if (bInitialSample) {
+        RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight);
+    } else {
+        rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample);
+        rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample);
+
+        FFX_MIN16_F3 weightedSample = colorSample * fSampleWeight;
+        rectificationBox.boxCenter += weightedSample;
+        rectificationBox.boxVec += colorSample * weightedSample;
+        rectificationBox.fBoxCenterWeight += fSampleWeight;
+    }
+}
+#endif
+
+void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBox rectificationBox)
+{
+    rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FfxFloat32(FSR3UPSCALER_EPSILON) ? rectificationBox.fBoxCenterWeight : FfxFloat32(1.f));
+    rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight;
+    rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight;
+    FfxFloat32x3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter));
+    rectificationBox.boxVec = stdDev;
+}
+#if FFX_HALF
+void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox)
+{
+    rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FFX_MIN16_F(FSR3UPSCALER_EPSILON) ? rectificationBox.fBoxCenterWeight : FFX_MIN16_F(1.f));
+    rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight;
+    rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight;
+    FFX_MIN16_F3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter));
+    rectificationBox.boxVec = stdDev;
+}
+#endif
+
+FfxFloat32x3 SafeRcp3(FfxFloat32x3 v)
+{
+    return (all(FFX_NOT_EQUAL(v, FfxFloat32x3(0, 0, 0)))) ? (FfxFloat32x3(1, 1, 1) / v) : FfxFloat32x3(0, 0, 0);
+}
+#if FFX_HALF
+FFX_MIN16_F3 SafeRcp3(FFX_MIN16_F3 v)
+{
+    return (all(FFX_NOT_EQUAL(v, FFX_MIN16_F3(0, 0, 0)))) ? (FFX_MIN16_F3(1, 1, 1) / v) : FFX_MIN16_F3(0, 0, 0);
+}
+#endif
+
+FfxFloat32 MinDividedByMax(const FfxFloat32 v0, const FfxFloat32 v1)
+{
+    const FfxFloat32 m = ffxMax(v0, v1);
+    return m != 0 ? ffxMin(v0, v1) / m : 0;
+}
+
+#if FFX_HALF
+FFX_MIN16_F MinDividedByMax(const FFX_MIN16_F v0, const FFX_MIN16_F v1)
+{
+    const FFX_MIN16_F m = ffxMax(v0, v1);
+    return m != FFX_MIN16_F(0) ? ffxMin(v0, v1) / m : FFX_MIN16_F(0);
+}
+#endif
+
+FfxFloat32x3 YCoCgToRGB(FfxFloat32x3 fYCoCg)
+{
+    FfxFloat32x3 fRgb;
+
+    fRgb = FfxFloat32x3(
+        fYCoCg.x + fYCoCg.y - fYCoCg.z,
+        fYCoCg.x + fYCoCg.z,
+        fYCoCg.x - fYCoCg.y - fYCoCg.z);
+
+    return fRgb;
+}
+#if FFX_HALF
+FFX_MIN16_F3 YCoCgToRGB(FFX_MIN16_F3 fYCoCg)
+{
+    FFX_MIN16_F3 fRgb;
+
+    fRgb = FFX_MIN16_F3(
+        fYCoCg.x + fYCoCg.y - fYCoCg.z,
+        fYCoCg.x + fYCoCg.z,
+        fYCoCg.x - fYCoCg.y - fYCoCg.z);
+
+    return fRgb;
+}
+#endif
+
+FfxFloat32x3 RGBToYCoCg(FfxFloat32x3 fRgb)
+{
+    FfxFloat32x3 fYCoCg;
+
+    fYCoCg = FfxFloat32x3(
+        0.25f * fRgb.r + 0.5f * fRgb.g + 0.25f * fRgb.b,
+        0.5f * fRgb.r - 0.5f * fRgb.b,
+        -0.25f * fRgb.r + 0.5f * fRgb.g - 0.25f * fRgb.b);
+
+    return fYCoCg;
+}
+#if FFX_HALF
+FFX_MIN16_F3 RGBToYCoCg(FFX_MIN16_F3 fRgb)
+{
+    FFX_MIN16_F3 fYCoCg;
+
+    fYCoCg = FFX_MIN16_F3(
+        0.25 * fRgb.r + 0.5 * fRgb.g + 0.25 * fRgb.b,
+        0.5 * fRgb.r - 0.5 * fRgb.b,
+        -0.25 * fRgb.r + 0.5 * fRgb.g - 0.25 * fRgb.b);
+
+    return fYCoCg;
+}
+#endif
+
+FfxFloat32 RGBToLuma(FfxFloat32x3 fLinearRgb)
+{
+    return dot(fLinearRgb, FfxFloat32x3(0.2126f, 0.7152f, 0.0722f));
+}
+#if FFX_HALF
+FFX_MIN16_F RGBToLuma(FFX_MIN16_F3 fLinearRgb)
+{
+    return dot(fLinearRgb, FFX_MIN16_F3(0.2126f, 0.7152f, 0.0722f));
+}
+#endif
+
+FfxFloat32 RGBToPerceivedLuma(FfxFloat32x3 fLinearRgb)
+{
+    FfxFloat32 fLuminance = RGBToLuma(fLinearRgb);
+
+    FfxFloat32 fPercievedLuminance = 0;
+    if (fLuminance <= 216.0f / 24389.0f) {
+        fPercievedLuminance = fLuminance * (24389.0f / 27.0f);
+    }
+    else {
+        fPercievedLuminance = ffxPow(fLuminance, 1.0f / 3.0f) * 116.0f - 16.0f;
+    }
+
+    return fPercievedLuminance * 0.01f;
+}
+#if FFX_HALF
+FFX_MIN16_F RGBToPerceivedLuma(FFX_MIN16_F3 fLinearRgb)
+{
+    FFX_MIN16_F fLuminance = RGBToLuma(fLinearRgb);
+
+    FFX_MIN16_F fPercievedLuminance = FFX_MIN16_F(0);
+    if (fLuminance <= FFX_MIN16_F(216.0f / 24389.0f)) {
+        fPercievedLuminance = fLuminance * FFX_MIN16_F(24389.0f / 27.0f);
+    }
+    else {
+        fPercievedLuminance = ffxPow(fLuminance, FFX_MIN16_F(1.0f / 3.0f)) * FFX_MIN16_F(116.0f) - FFX_MIN16_F(16.0f);
+    }
+
+    return fPercievedLuminance * FFX_MIN16_F(0.01f);
+}
+#endif
+
+FfxFloat32x3 Tonemap(FfxFloat32x3 fRgb)
+{
+    return fRgb / (ffxMax(ffxMax(0.f, fRgb.r), ffxMax(fRgb.g, fRgb.b)) + 1.f).xxx;
+}
+
+FfxFloat32x3 InverseTonemap(FfxFloat32x3 fRgb)
+{
+    return fRgb / ffxMax(FSR3UPSCALER_TONEMAP_EPSILON, 1.f - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx;
+}
+
+#if FFX_HALF
+FFX_MIN16_F3 Tonemap(FFX_MIN16_F3 fRgb)
+{
+    return fRgb / (ffxMax(ffxMax(FFX_MIN16_F(0.f), fRgb.r), ffxMax(fRgb.g, fRgb.b)) + FFX_MIN16_F(1.f)).xxx;
+}
+
+FFX_MIN16_F3 InverseTonemap(FFX_MIN16_F3 fRgb)
+{
+    return fRgb / ffxMax(FFX_MIN16_F(FSR3UPSCALER_TONEMAP_EPSILON), FFX_MIN16_F(1.f) - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx;
+}
+#endif
+
+FfxInt32x2 ClampLoad(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize)
+{
+    FfxInt32x2 result = iPxSample + iPxOffset;
+    result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x;
+    result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x;
+    result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y;
+    result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y;
+    return result;
+
+    // return ffxMed3(iPxSample + iPxOffset, FfxInt32x2(0, 0), iTextureSize - FfxInt32x2(1, 1));
+}
+#if FFX_HALF
+FFX_MIN16_I2 ClampLoad(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize)
+{
+    FFX_MIN16_I2 result = iPxSample + iPxOffset;
+    result.x = (iPxOffset.x < 0) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x;
+    result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x;
+    result.y = (iPxOffset.y < 0) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y;
+    result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y;
+    return result;
+
+    // return ffxMed3Half(iPxSample + iPxOffset, FFX_MIN16_I2(0, 0), iTextureSize - FFX_MIN16_I2(1, 1));
+}
+#endif
+
+FfxFloat32x2 ClampUv(FfxFloat32x2 fUv, FfxInt32x2 iTextureSize, FfxInt32x2 iResourceSize)
+{
+    const FfxFloat32x2 fSampleLocation = fUv * iTextureSize;
+    const FfxFloat32x2 fClampedLocation = ffxMax(FfxFloat32x2(0.5f, 0.5f), ffxMin(fSampleLocation, FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f)));
+    const FfxFloat32x2 fClampedUv = fClampedLocation / FfxFloat32x2(iResourceSize);
+
+    return fClampedUv;
+}
+
+FfxBoolean IsOnScreen(FfxInt32x2 pos, FfxInt32x2 size)
+{
+    return all(FFX_LESS_THAN(FfxUInt32x2(pos), FfxUInt32x2(size)));
+}
+#if FFX_HALF
+FfxBoolean IsOnScreen(FFX_MIN16_I2 pos, FFX_MIN16_I2 size)
+{
+    return all(FFX_LESS_THAN(FFX_MIN16_U2(pos), FFX_MIN16_U2(size)));
+}
+#endif
+
+FfxFloat32 ComputeAutoExposureFromLavg(FfxFloat32 Lavg)
+{
+    Lavg = exp(Lavg);
+
+    const FfxFloat32 S = 100.0f; //ISO arithmetic speed
+    const FfxFloat32 K = 12.5f;
+    FfxFloat32 ExposureISO100 = log2((Lavg * S) / K);
+
+    const FfxFloat32 q = 0.65f;
+    FfxFloat32 Lmax = (78.0f / (q * S)) * ffxPow(2.0f, ExposureISO100);
+
+    return 1 / Lmax;
+}
+#if FFX_HALF
+FFX_MIN16_F ComputeAutoExposureFromLavg(FFX_MIN16_F Lavg)
+{
+    Lavg = exp(Lavg);
+
+    const FFX_MIN16_F S = FFX_MIN16_F(100.0f); //ISO arithmetic speed
+    const FFX_MIN16_F K = FFX_MIN16_F(12.5f);
+    const FFX_MIN16_F ExposureISO100 = log2((Lavg * S) / K);
+
+    const FFX_MIN16_F q = FFX_MIN16_F(0.65f);
+    const FFX_MIN16_F Lmax = (FFX_MIN16_F(78.0f) / (q * S)) * ffxPow(FFX_MIN16_F(2.0f), ExposureISO100);
+
+    return FFX_MIN16_F(1) / Lmax;
+}
+#endif
+
+FfxInt32x2 ComputeHrPosFromLrPos(FfxInt32x2 iPxLrPos)
+{
+    FfxFloat32x2 fSrcJitteredPos = FfxFloat32x2(iPxLrPos) + 0.5f - Jitter();
+    FfxFloat32x2 fLrPosInHr = (fSrcJitteredPos / RenderSize()) * DisplaySize();
+    FfxInt32x2 iPxHrPos = FfxInt32x2(floor(fLrPosInHr));
+    return iPxHrPos;
+}
+#if FFX_HALF
+FFX_MIN16_I2 ComputeHrPosFromLrPos(FFX_MIN16_I2 iPxLrPos)
+{
+    FFX_MIN16_F2 fSrcJitteredPos = FFX_MIN16_F2(iPxLrPos) + FFX_MIN16_F(0.5f) - FFX_MIN16_F2(Jitter());
+    FFX_MIN16_F2 fLrPosInHr = (fSrcJitteredPos / FFX_MIN16_F2(RenderSize())) * FFX_MIN16_F2(DisplaySize());
+    FFX_MIN16_I2 iPxHrPos = FFX_MIN16_I2(floor(fLrPosInHr));
+    return iPxHrPos;
+}
+#endif
+
+FfxFloat32x2 ComputeNdc(FfxFloat32x2 fPxPos, FfxInt32x2 iSize)
+{
+    return fPxPos / FfxFloat32x2(iSize) * FfxFloat32x2(2.0f, -2.0f) + FfxFloat32x2(-1.0f, 1.0f);
+}
+
+FfxFloat32 GetViewSpaceDepth(FfxFloat32 fDeviceDepth)
+{
+    const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors();
+
+    // fDeviceToViewDepth details found in ffx_fsr3upscaler.cpp
+    return (fDeviceToViewDepth[1] / (fDeviceDepth - fDeviceToViewDepth[0]));
+}
+
+FfxFloat32 GetViewSpaceDepthInMeters(FfxFloat32 fDeviceDepth)
+{
+    return GetViewSpaceDepth(fDeviceDepth) * ViewSpaceToMetersFactor();
+}
+
+FfxFloat32x3 GetViewSpacePosition(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth)
+{
+    const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors();
+
+    const FfxFloat32 Z = GetViewSpaceDepth(fDeviceDepth);
+
+    const FfxFloat32x2 fNdcPos = ComputeNdc(iViewportPos, iViewportSize);
+    const FfxFloat32 X = fDeviceToViewDepth[2] * fNdcPos.x * Z;
+    const FfxFloat32 Y = fDeviceToViewDepth[3] * fNdcPos.y * Z;
+
+    return FfxFloat32x3(X, Y, Z);
+}
+
+FfxFloat32x3 GetViewSpacePositionInMeters(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth)
+{
+    return GetViewSpacePosition(iViewportPos, iViewportSize, fDeviceDepth) * ViewSpaceToMetersFactor();
+}
+
+FfxFloat32 GetMaxDistanceInMeters()
+{
+#if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH
+    return GetViewSpaceDepth(0.0f) * ViewSpaceToMetersFactor();
+#else
+    return GetViewSpaceDepth(1.0f) * ViewSpaceToMetersFactor();
+#endif
+}
+
+FfxFloat32x3 PrepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure, FfxFloat32 fPreExposure)
+{
+    fRgb /= fPreExposure;
+    fRgb *= fExposure;
+
+    fRgb = clamp(fRgb, 0.0f, FSR3UPSCALER_FP16_MAX);
+
+    return fRgb;
+}
+
+FfxFloat32x3 UnprepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure)
+{
+    fRgb /= fExposure;
+    fRgb *= PreExposure();
+
+    return fRgb;
+}
+
+
+struct BilinearSamplingData
+{
+    FfxInt32x2 iOffsets[4];
+    FfxFloat32 fWeights[4];
+    FfxInt32x2 iBasePos;
+};
+
+BilinearSamplingData GetBilinearSamplingData(FfxFloat32x2 fUv, FfxInt32x2 iSize)
+{
+    BilinearSamplingData data;
+
+    FfxFloat32x2 fPxSample = (fUv * iSize) - FfxFloat32x2(0.5f, 0.5f);
+    data.iBasePos = FfxInt32x2(floor(fPxSample));
+    FfxFloat32x2 fPxFrac = ffxFract(fPxSample);
+
+    data.iOffsets[0] = FfxInt32x2(0, 0);
+    data.iOffsets[1] = FfxInt32x2(1, 0);
+    data.iOffsets[2] = FfxInt32x2(0, 1);
+    data.iOffsets[3] = FfxInt32x2(1, 1);
+
+    data.fWeights[0] = (1 - fPxFrac.x) * (1 - fPxFrac.y);
+    data.fWeights[1] = (fPxFrac.x) * (1 - fPxFrac.y);
+    data.fWeights[2] = (1 - fPxFrac.x) * (fPxFrac.y);
+    data.fWeights[3] = (fPxFrac.x) * (fPxFrac.y);
+
+    return data;
+}
+
+struct PlaneData
+{
+    FfxFloat32x3 fNormal;
+    FfxFloat32 fDistanceFromOrigin;
+};
+
+PlaneData GetPlaneFromPoints(FfxFloat32x3 fP0, FfxFloat32x3 fP1, FfxFloat32x3 fP2)
+{
+    PlaneData plane;
+
+    FfxFloat32x3 v0 = fP0 - fP1;
+    FfxFloat32x3 v1 = fP0 - fP2;
+    plane.fNormal = normalize(cross(v0, v1));
+    plane.fDistanceFromOrigin = -dot(fP0, plane.fNormal);
+
+    return plane;
+}
+
+FfxFloat32 PointToPlaneDistance(PlaneData plane, FfxFloat32x3 fPoint)
+{
+    return abs(dot(plane.fNormal, fPoint) + plane.fDistanceFromOrigin);
+}
+
+#endif // #if defined(FFX_GPU)
+
+#endif //!defined(FFX_FSR3UPSCALER_COMMON_H)
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h.meta
new file mode 100644
index 00000000..2ebe2aa0
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_common.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: 6a638bec681caac4fa8e2ca198726694
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h
new file mode 100644
index 00000000..d26cf234
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h
@@ -0,0 +1,176 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+FFX_GROUPSHARED FfxUInt32 spdCounter;
+
+void SpdIncreaseAtomicCounter(FfxUInt32 slice)
+{
+    SPD_IncreaseAtomicCounter(spdCounter);
+}
+
+FfxUInt32 SpdGetAtomicCounter()
+{
+    return spdCounter;
+}
+
+void SpdResetAtomicCounter(FfxUInt32 slice)
+{
+    SPD_ResetAtomicCounter();
+}
+
+#ifndef SPD_PACKED_ONLY
+FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16];
+FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16];
+FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16];
+FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16];
+
+FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 tex, FfxUInt32 slice)
+{
+    FfxFloat32x2 fUv = (tex + 0.5f + Jitter()) / RenderSize();
+    fUv = ClampUv(fUv, RenderSize(), InputColorResourceDimensions());
+    FfxFloat32x3 fRgb = SampleInputColor(fUv);
+
+    fRgb /= PreExposure();
+   
+    //compute log luma
+    const FfxFloat32 fLogLuma = log(ffxMax(FSR3UPSCALER_EPSILON, RGBToLuma(fRgb)));
+
+    // Make sure out of screen pixels contribute no value to the end result
+    const FfxFloat32 result = all(FFX_LESS_THAN(tex, RenderSize())) ? fLogLuma : 0.0f;
+
+    return FfxFloat32x4(result, 0, 0, 0);
+}
+
+FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice)
+{
+    return SPD_LoadMipmap5(tex);
+}
+
+void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice)
+{
+    if (index == LumaMipLevelToUse() || index == 5)
+    {
+        SPD_SetMipmap(pix, index, outValue.r);
+    }
+
+    if (index == MipCount() - 1) { //accumulate on 1x1 level
+
+        if (all(FFX_EQUAL(pix, FfxInt32x2(0, 0))))
+        {
+            FfxFloat32 prev = SPD_LoadExposureBuffer().y;
+            FfxFloat32 result = outValue.r;
+
+            if (prev < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values
+            {
+                FfxFloat32 rate = 1.0f;
+                result = prev + (result - prev) * (1 - exp(-DeltaTime() * rate));
+            }
+            FfxFloat32x2 spdOutput = FfxFloat32x2(ComputeAutoExposureFromLavg(result), result);
+            SPD_SetExposureBuffer(spdOutput);
+        }
+    }
+}
+
+FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y)
+{
+    return FfxFloat32x4(
+        spdIntermediateR[x][y],
+        spdIntermediateG[x][y],
+        spdIntermediateB[x][y],
+        spdIntermediateA[x][y]);
+}
+void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value)
+{
+    spdIntermediateR[x][y] = value.x;
+    spdIntermediateG[x][y] = value.y;
+    spdIntermediateB[x][y] = value.z;
+    spdIntermediateA[x][y] = value.w;
+}
+FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3)
+{
+    return (v0 + v1 + v2 + v3) * 0.25f;
+}
+#endif
+
+// define fetch and store functions Packed
+#if FFX_HALF
+
+FFX_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16];
+FFX_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16];
+
+FfxFloat16x4 SpdLoadSourceImageH(FfxFloat32x2 tex, FfxUInt32 slice)
+{
+    return FfxFloat16x4(0, 0, 0, 0);
+}
+
+FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice)
+{
+    return FfxFloat16x4(0, 0, 0, 0);
+}
+
+void SpdStoreH(FfxInt32x2 p, FfxFloat16x4 value, FfxUInt32 mip, FfxUInt32 slice)
+{
+}
+
+FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y)
+{
+    return FfxFloat16x4(
+        spdIntermediateRG[x][y].x,
+        spdIntermediateRG[x][y].y,
+        spdIntermediateBA[x][y].x,
+        spdIntermediateBA[x][y].y);
+}
+
+void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value)
+{
+    spdIntermediateRG[x][y] = value.xy;
+    spdIntermediateBA[x][y] = value.zw;
+}
+
+FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3)
+{
+    return (v0 + v1 + v2 + v3) * FfxFloat16(0.25);
+}
+#endif
+
+#include "spd/ffx_spd.h"
+
+void ComputeAutoExposure(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex)
+{
+#if FFX_HALF
+    SpdDownsampleH(
+        FfxUInt32x2(WorkGroupId.xy),
+        FfxUInt32(LocalThreadIndex),
+        FfxUInt32(MipCount()),
+        FfxUInt32(NumWorkGroups()),
+        FfxUInt32(WorkGroupId.z),
+        FfxUInt32x2(WorkGroupOffset()));
+#else
+    SpdDownsample(
+        FfxUInt32x2(WorkGroupId.xy),
+        FfxUInt32(LocalThreadIndex),
+        FfxUInt32(MipCount()),
+        FfxUInt32(NumWorkGroups()),
+        FfxUInt32(WorkGroupId.z),
+        FfxUInt32x2(WorkGroupOffset()));
+#endif
+}
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h.meta
new file mode 100644
index 00000000..9f1d2ab9
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_compute_luminance_pyramid.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: 61bd10363d44ee2478461c9e9efbcb67
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_depth_clip.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_depth_clip.h
new file mode 100644
index 00000000..53763c85
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_depth_clip.h
@@ -0,0 +1,259 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#ifndef FFX_FSR3UPSCALER_DEPTH_CLIP_H
+#define FFX_FSR3UPSCALER_DEPTH_CLIP_H
+
+FFX_STATIC const FfxFloat32 DepthClipBaseScale = 4.0f;
+
+FfxFloat32 ComputeDepthClip(FfxFloat32x2 fUvSample, FfxFloat32 fCurrentDepthSample)
+{
+    FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(fCurrentDepthSample);
+    BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUvSample, RenderSize());
+
+    FfxFloat32 fDilatedSum = 0.0f;
+    FfxFloat32 fDepth = 0.0f;
+    FfxFloat32 fWeightSum = 0.0f;
+    for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) {
+
+        const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
+        const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset;
+
+        if (IsOnScreen(iSamplePos, RenderSize())) {
+            const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex];
+            if (fWeight > fReconstructedDepthBilinearWeightThreshold) {
+
+                const FfxFloat32 fPrevDepthSample = LoadReconstructedPrevDepth(iSamplePos);
+                const FfxFloat32 fPrevNearestDepthViewSpace = GetViewSpaceDepth(fPrevDepthSample);
+
+                const FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace;
+
+                if (fDepthDiff > 0.0f) {
+
+#if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH
+                    const FfxFloat32 fPlaneDepth = ffxMin(fPrevDepthSample, fCurrentDepthSample);
+#else
+                    const FfxFloat32 fPlaneDepth = ffxMax(fPrevDepthSample, fCurrentDepthSample);
+#endif
+                    
+                    const FfxFloat32x3 fCenter = GetViewSpacePosition(FfxInt32x2(RenderSize() * 0.5f), RenderSize(), fPlaneDepth);
+                    const FfxFloat32x3 fCorner = GetViewSpacePosition(FfxInt32x2(0, 0), RenderSize(), fPlaneDepth);
+
+                    const FfxFloat32 fHalfViewportWidth = length(FfxFloat32x2(RenderSize()));
+                    const FfxFloat32 fDepthThreshold = ffxMax(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace);
+
+                    const FfxFloat32 Ksep = 1.37e-05f;
+                    const FfxFloat32 Kfov = length(fCorner) / length(fCenter);
+                    const FfxFloat32 fRequiredDepthSeparation = Ksep * Kfov * fHalfViewportWidth * fDepthThreshold;
+
+                    const FfxFloat32 fResolutionFactor = ffxSaturate(length(FfxFloat32x2(RenderSize())) / length(FfxFloat32x2(1920.0f, 1080.0f)));
+                    const FfxFloat32 fPower = ffxLerp(1.0f, 3.0f, fResolutionFactor);
+                    fDepth += ffxPow(ffxSaturate(FfxFloat32(fRequiredDepthSeparation / fDepthDiff)), fPower) * fWeight;
+                    fWeightSum += fWeight;
+                }
+            }
+        }
+    }
+
+    return (fWeightSum > 0) ? ffxSaturate(1.0f - fDepth / fWeightSum) : 0.0f;
+}
+
+FfxFloat32 ComputeMotionDivergence(FfxInt32x2 iPxPos, FfxInt32x2 iPxInputMotionVectorSize)
+{
+    FfxFloat32 minconvergence = 1.0f;
+
+    FfxFloat32x2 fMotionVectorNucleus = LoadInputMotionVector(iPxPos);
+    FfxFloat32 fNucleusVelocityLr = length(fMotionVectorNucleus * RenderSize());
+    FfxFloat32 fMaxVelocityUv = length(fMotionVectorNucleus);
+
+    const FfxFloat32 MotionVectorVelocityEpsilon = 1e-02f;
+
+    if (fNucleusVelocityLr > MotionVectorVelocityEpsilon) {
+        for (FfxInt32 y = -1; y <= 1; ++y) {
+            for (FfxInt32 x = -1; x <= 1; ++x) {
+
+                FfxInt32x2 sp = ClampLoad(iPxPos, FfxInt32x2(x, y), iPxInputMotionVectorSize);
+
+                FfxFloat32x2 fMotionVector = LoadInputMotionVector(sp);
+                FfxFloat32 fVelocityUv = length(fMotionVector);
+
+                fMaxVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv);
+                fVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv);
+                minconvergence = ffxMin(minconvergence, dot(fMotionVector / fVelocityUv, fMotionVectorNucleus / fVelocityUv));
+            }
+        }
+    }
+
+    return ffxSaturate(1.0f - minconvergence) * ffxSaturate(fMaxVelocityUv / 0.01f);
+}
+
+FfxFloat32 ComputeDepthDivergence(FfxInt32x2 iPxPos)
+{
+    const FfxFloat32 fMaxDistInMeters = GetMaxDistanceInMeters();
+    FfxFloat32 fDepthMax = 0.0f;
+    FfxFloat32 fDepthMin = fMaxDistInMeters;
+
+    FfxInt32 iMaxDistFound = 0;
+
+    for (FfxInt32 y = -1; y < 2; y++) {
+        for (FfxInt32 x = -1; x < 2; x++) {
+
+            const FfxInt32x2 iOffset = FfxInt32x2(x, y);
+            const FfxInt32x2 iSamplePos = iPxPos + iOffset;
+
+            const FfxFloat32 fOnScreenFactor = IsOnScreen(iSamplePos, RenderSize()) ? 1.0f : 0.0f;
+            FfxFloat32 fDepth = GetViewSpaceDepthInMeters(LoadDilatedDepth(iSamplePos)) * fOnScreenFactor;
+
+            iMaxDistFound |= FfxInt32(fMaxDistInMeters == fDepth);
+
+            fDepthMin = ffxMin(fDepthMin, fDepth);
+            fDepthMax = ffxMax(fDepthMax, fDepth);
+        }
+    }
+
+    return (1.0f - fDepthMin / fDepthMax) * (FfxBoolean(iMaxDistFound) ? 0.0f : 1.0f);
+}
+
+FfxFloat32 ComputeTemporalMotionDivergence(FfxInt32x2 iPxPos)
+{
+    const FfxFloat32x2 fUv = FfxFloat32x2(iPxPos + 0.5f) / RenderSize();
+
+    FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos);
+    FfxFloat32x2 fReprojectedUv = fUv + fMotionVector;
+    fReprojectedUv = ClampUv(fReprojectedUv, RenderSize(), MaxRenderSize());
+    FfxFloat32x2 fPrevMotionVector = SamplePreviousDilatedMotionVector(fReprojectedUv);
+
+    float fPxDistance = length(fMotionVector * DisplaySize());
+    return fPxDistance > 1.0f ? ffxLerp(0.0f, 1.0f - ffxSaturate(length(fPrevMotionVector) / length(fMotionVector)), ffxSaturate(ffxPow(fPxDistance / 20.0f, 3.0f))) : 0;
+}
+
+void PreProcessReactiveMasks(FfxInt32x2 iPxLrPos, FfxFloat32 fMotionDivergence)
+{
+    // Compensate for bilinear sampling in accumulation pass
+
+    FfxFloat32x3 fReferenceColor = LoadInputColor(iPxLrPos).xyz;
+    FfxFloat32x2 fReactiveFactor = FfxFloat32x2(0.0f, fMotionDivergence);
+
+    float fMasksSum = 0.0f;
+
+    FfxFloat32x3 fColorSamples[9];
+    FfxFloat32 fReactiveSamples[9];
+    FfxFloat32 fTransparencyAndCompositionSamples[9];
+
+    FFX_UNROLL
+    for (FfxInt32 y = -1; y < 2; y++) {
+        FFX_UNROLL
+        for (FfxInt32 x = -1; x < 2; x++) {
+
+            const FfxInt32x2 sampleCoord = ClampLoad(iPxLrPos, FfxInt32x2(x, y), FfxInt32x2(RenderSize()));
+
+            FfxInt32 sampleIdx = (y + 1) * 3 + x + 1;
+
+            FfxFloat32x3 fColorSample = LoadInputColor(sampleCoord).xyz;
+            FfxFloat32 fReactiveSample = LoadReactiveMask(sampleCoord);
+            FfxFloat32 fTransparencyAndCompositionSample = LoadTransparencyAndCompositionMask(sampleCoord);
+
+            fColorSamples[sampleIdx] = fColorSample;
+            fReactiveSamples[sampleIdx] = fReactiveSample;
+            fTransparencyAndCompositionSamples[sampleIdx] = fTransparencyAndCompositionSample;
+
+            fMasksSum += (fReactiveSample + fTransparencyAndCompositionSample);
+        }
+    }
+
+    if (fMasksSum > 0)
+    {
+        for (FfxInt32 sampleIdx = 0; sampleIdx < 9; sampleIdx++)
+        {
+            FfxFloat32x3 fColorSample = fColorSamples[sampleIdx];
+            FfxFloat32 fReactiveSample = fReactiveSamples[sampleIdx];
+            FfxFloat32 fTransparencyAndCompositionSample = fTransparencyAndCompositionSamples[sampleIdx];
+
+            const FfxFloat32 fMaxLenSq = ffxMax(dot(fReferenceColor, fReferenceColor), dot(fColorSample, fColorSample));
+            const FfxFloat32 fSimilarity = dot(fReferenceColor, fColorSample) / fMaxLenSq;
+
+            // Increase power for non-similar samples
+            const FfxFloat32 fPowerBiasMax = 6.0f;
+            const FfxFloat32 fSimilarityPower = 1.0f + (fPowerBiasMax - fSimilarity * fPowerBiasMax);
+            const FfxFloat32 fWeightedReactiveSample = ffxPow(fReactiveSample, fSimilarityPower);
+            const FfxFloat32 fWeightedTransparencyAndCompositionSample = ffxPow(fTransparencyAndCompositionSample, fSimilarityPower);
+
+            fReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32x2(fWeightedReactiveSample, fWeightedTransparencyAndCompositionSample));
+        }
+    }
+
+    StoreDilatedReactiveMasks(iPxLrPos, fReactiveFactor);
+}
+
+FfxFloat32x3 ComputePreparedInputColor(FfxInt32x2 iPxLrPos)
+{
+    //We assume linear data. if non-linear input (sRGB, ...),
+    //then we should convert to linear first and back to sRGB on output.
+    FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos));
+
+    fRgb = PrepareRgb(fRgb, Exposure(), PreExposure());
+
+    const FfxFloat32x3 fPreparedYCoCg = RGBToYCoCg(fRgb);
+
+    return fPreparedYCoCg;
+}
+
+FfxFloat32 EvaluateSurface(FfxInt32x2 iPxPos, FfxFloat32x2 fMotionVector)
+{
+    FfxFloat32 d0 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, -1)));
+    FfxFloat32 d1 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 0)));
+    FfxFloat32 d2 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 1)));
+
+    return 1.0f - FfxFloat32(((d0 - d1) > (d1 * 0.01f)) && ((d1 - d2) > (d2 * 0.01f)));
+}
+
+void DepthClip(FfxInt32x2 iPxPos)
+{
+    FfxFloat32x2 fDepthUv = (iPxPos + 0.5f) / RenderSize();
+    FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos);
+
+    // Discard tiny mvs
+    fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.01f);
+
+    const FfxFloat32x2 fDilatedUv = fDepthUv + fMotionVector;
+    const FfxFloat32 fDilatedDepth = LoadDilatedDepth(iPxPos);
+    const FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(LoadInputDepth(iPxPos));
+
+    // Compute prepared input color and depth clip
+    FfxFloat32 fDepthClip = ComputeDepthClip(fDilatedUv, fDilatedDepth) * EvaluateSurface(iPxPos, fMotionVector);
+    FfxFloat32x3 fPreparedYCoCg = ComputePreparedInputColor(iPxPos);
+    StorePreparedInputColor(iPxPos, FfxFloat32x4(fPreparedYCoCg, fDepthClip));
+
+    // Compute dilated reactive mask
+#if FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+    FfxInt32x2 iSamplePos = iPxPos;
+#else
+    FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxPos);
+#endif
+
+    FfxFloat32 fMotionDivergence = ComputeMotionDivergence(iSamplePos, RenderSize());
+    FfxFloat32 fTemporalMotionDifference = ffxSaturate(ComputeTemporalMotionDivergence(iPxPos) - ComputeDepthDivergence(iPxPos));
+
+    PreProcessReactiveMasks(iPxPos, ffxMax(fTemporalMotionDifference, fMotionDivergence));
+}
+
+#endif //!defined( FFX_FSR3UPSCALER_DEPTH_CLIPH )
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_depth_clip.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_depth_clip.h.meta
new file mode 100644
index 00000000..21fe6270
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_depth_clip.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: 7c662249d70c4434da4f2da00e432c38
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_lock.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_lock.h
new file mode 100644
index 00000000..e1a0d06e
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_lock.h
@@ -0,0 +1,116 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#ifndef FFX_FSR3UPSCALER_LOCK_H
+#define FFX_FSR3UPSCALER_LOCK_H
+
+void ClearResourcesForNextFrame(in FfxInt32x2 iPxHrPos)
+{
+    if (all(FFX_LESS_THAN(iPxHrPos, FfxInt32x2(RenderSize()))))
+    {
+#if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH
+        const FfxUInt32 farZ = 0x0;
+#else
+        const FfxUInt32 farZ = 0x3f800000;
+#endif
+        SetReconstructedDepth(iPxHrPos, farZ);
+    }
+}
+
+FfxBoolean ComputeThinFeatureConfidence(FfxInt32x2 pos)
+{
+    const FfxInt32 RADIUS = 1;
+
+    FfxFloat32 fNucleus = LoadLockInputLuma(pos);
+
+    FfxFloat32 similar_threshold = 1.05f;
+    FfxFloat32 dissimilarLumaMin = FSR3UPSCALER_FLT_MAX;
+    FfxFloat32 dissimilarLumaMax = 0;
+
+    /*
+     0 1 2
+     3 4 5
+     6 7 8
+    */
+
+    #define SETBIT(x) (1U << x)
+
+    FfxUInt32 mask = SETBIT(4); //flag fNucleus as similar
+
+    const FfxUInt32 uNumRejectionMasks = 4;
+    const FfxUInt32 uRejectionMasks[uNumRejectionMasks] = {
+        SETBIT(0) | SETBIT(1) | SETBIT(3) | SETBIT(4), //Upper left
+        SETBIT(1) | SETBIT(2) | SETBIT(4) | SETBIT(5), //Upper right
+        SETBIT(3) | SETBIT(4) | SETBIT(6) | SETBIT(7), //Lower left
+        SETBIT(4) | SETBIT(5) | SETBIT(7) | SETBIT(8), //Lower right
+    };
+
+    FfxInt32 idx = 0;
+    FFX_UNROLL
+    for (FfxInt32 y = -RADIUS; y <= RADIUS; y++) {
+        FFX_UNROLL
+        for (FfxInt32 x = -RADIUS; x <= RADIUS; x++, idx++) {
+            if (x == 0 && y == 0) continue;
+
+            FfxInt32x2 samplePos = ClampLoad(pos, FfxInt32x2(x, y), FfxInt32x2(RenderSize()));
+
+            FfxFloat32 sampleLuma = LoadLockInputLuma(samplePos);
+            FfxFloat32 difference = ffxMax(sampleLuma, fNucleus) / ffxMin(sampleLuma, fNucleus);
+
+            if (difference > 0 && (difference < similar_threshold)) {
+                mask |= SETBIT(idx);
+            } else {
+                dissimilarLumaMin = ffxMin(dissimilarLumaMin, sampleLuma);
+                dissimilarLumaMax = ffxMax(dissimilarLumaMax, sampleLuma);
+            }
+        }
+    }
+
+    FfxBoolean isRidge = fNucleus > dissimilarLumaMax || fNucleus < dissimilarLumaMin;
+
+    if (FFX_FALSE == isRidge) {
+
+        return false;
+    }
+
+    FFX_UNROLL
+    for (FfxInt32 i = 0; i < 4; i++) {
+
+        if ((mask & uRejectionMasks[i]) == uRejectionMasks[i]) {
+            return false;
+        }
+    }
+    
+    return true;
+}
+
+void ComputeLock(FfxInt32x2 iPxLrPos)
+{
+    if (ComputeThinFeatureConfidence(iPxLrPos))
+    {
+        StoreNewLocks(ComputeHrPosFromLrPos(iPxLrPos), 1.f);
+    }
+
+    // ClearResourcesForNextFrame(iPxLrPos);
+}
+
+#endif // FFX_FSR3UPSCALER_LOCK_H
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_lock.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_lock.h.meta
new file mode 100644
index 00000000..38a5a57a
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_lock.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: c7e9f53dd040b2645af5ccd936a94b0e
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h
new file mode 100644
index 00000000..37091135
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h
@@ -0,0 +1,107 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#ifndef FFX_FSR3UPSCALER_POSTPROCESS_LOCK_STATUS_H
+#define FFX_FSR3UPSCALER_POSTPROCESS_LOCK_STATUS_H
+
+FfxFloat32x4 WrapShadingChangeLuma(FfxInt32x2 iPxSample)
+{
+    return FfxFloat32x4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0);
+}
+
+#if FFX_HALF
+FFX_MIN16_F4 WrapShadingChangeLuma(FFX_MIN16_I2 iPxSample)
+{
+    return FFX_MIN16_F4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0);
+}
+#endif
+
+#if FFX_FSR3UPSCALER_OPTION_POSTPROCESSLOCKSTATUS_SAMPLERS_USE_DATA_HALF && FFX_HALF
+DeclareCustomFetchBilinearSamplesMin16(FetchShadingChangeLumaSamples, WrapShadingChangeLuma)
+#else
+DeclareCustomFetchBicubicSamples(FetchShadingChangeLumaSamples, WrapShadingChangeLuma)
+#endif
+DeclareCustomTextureSample(ShadingChangeLumaSample, Lanczos2, FetchShadingChangeLumaSamples)
+
+FfxFloat32 GetShadingChangeLuma(FfxInt32x2 iPxHrPos, FfxFloat32x2 fUvCoord)
+{
+    FfxFloat32 fShadingChangeLuma = 0;
+
+#if 0
+    fShadingChangeLuma = Exposure() * exp(ShadingChangeLumaSample(fUvCoord, LumaMipDimensions()).x);
+#else
+
+    const FfxFloat32 fDiv = FfxFloat32(2u << LumaMipLevelToUse());
+    FfxInt32x2 iMipRenderSize = FfxInt32x2(RenderSize() / fDiv);
+
+    fUvCoord = ClampUv(fUvCoord, iMipRenderSize, LumaMipDimensions());
+    fShadingChangeLuma = Exposure() * exp(FfxFloat32(SampleMipLuma(fUvCoord, LumaMipLevelToUse())));
+#endif
+
+    fShadingChangeLuma = ffxPow(fShadingChangeLuma, 1.0f / 6.0f);
+
+    return fShadingChangeLuma;
+}
+
+void UpdateLockStatus(AccumulationPassCommonParams params,
+    FFX_PARAMETER_INOUT FfxFloat32 fReactiveFactor, LockState state,
+    FFX_PARAMETER_INOUT FfxFloat32x2 fLockStatus,
+    FFX_PARAMETER_OUT FfxFloat32 fLockContributionThisFrame,
+    FFX_PARAMETER_OUT FfxFloat32 fLuminanceDiff) {
+
+    const FfxFloat32 fShadingChangeLuma = GetShadingChangeLuma(params.iPxHrPos, params.fHrUv);
+
+    //init temporal shading change factor, init to -1 or so in reproject to know if "true new"?
+    fLockStatus[LOCK_TEMPORAL_LUMA] = (fLockStatus[LOCK_TEMPORAL_LUMA] == FfxFloat32(0.0f)) ? fShadingChangeLuma : fLockStatus[LOCK_TEMPORAL_LUMA];
+
+    FfxFloat32 fPreviousShadingChangeLuma = fLockStatus[LOCK_TEMPORAL_LUMA];
+
+    fLuminanceDiff = 1.0f - MinDividedByMax(fPreviousShadingChangeLuma, fShadingChangeLuma);
+
+    if (state.NewLock) {
+        fLockStatus[LOCK_TEMPORAL_LUMA] = fShadingChangeLuma;
+
+        fLockStatus[LOCK_LIFETIME_REMAINING] = (fLockStatus[LOCK_LIFETIME_REMAINING] != 0.0f) ? 2.0f : 1.0f;
+    }
+    else if(fLockStatus[LOCK_LIFETIME_REMAINING] <= 1.0f) {
+        fLockStatus[LOCK_TEMPORAL_LUMA] = ffxLerp(fLockStatus[LOCK_TEMPORAL_LUMA], FfxFloat32(fShadingChangeLuma), 0.5f);
+    }
+    else {
+        if (fLuminanceDiff > 0.1f) {
+            KillLock(fLockStatus);
+        }
+    }
+
+    fReactiveFactor = ffxMax(fReactiveFactor, ffxSaturate((fLuminanceDiff - 0.1f) * 10.0f));
+    fLockStatus[LOCK_LIFETIME_REMAINING] *= (1.0f - fReactiveFactor);
+
+    fLockStatus[LOCK_LIFETIME_REMAINING] *= ffxSaturate(1.0f - params.fAccumulationMask);
+    fLockStatus[LOCK_LIFETIME_REMAINING] *= FfxFloat32(params.fDepthClipFactor < 0.1f);
+
+    // Compute this frame lock contribution
+    const FfxFloat32 fLifetimeContribution = ffxSaturate(fLockStatus[LOCK_LIFETIME_REMAINING] - 1.0f);
+    const FfxFloat32 fShadingChangeContribution = ffxSaturate(MinDividedByMax(fLockStatus[LOCK_TEMPORAL_LUMA], fShadingChangeLuma));
+
+    fLockContributionThisFrame = ffxSaturate(ffxSaturate(fLifetimeContribution * 4.0f) * fShadingChangeContribution);
+}
+
+#endif //!defined( FFX_FSR3UPSCALER_POSTPROCESS_LOCK_STATUS_H )
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h.meta
new file mode 100644
index 00000000..f8b56161
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_postprocess_lock_status.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: 67a8b72ceb93d634f883b086fdccb348
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h
new file mode 100644
index 00000000..77619a51
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h
@@ -0,0 +1,67 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#define GROUP_SIZE  8
+#define FSR_RCAS_DENOISE 1
+
+#include "ffx_core.h"
+
+void WriteUpscaledOutput(FFX_MIN16_U2 iPxHrPos, FfxFloat32x3 fUpscaledColor)
+{
+    StoreUpscaledOutput(FFX_MIN16_I2(iPxHrPos), fUpscaledColor);
+}
+
+#define FSR_RCAS_F 1
+FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p)
+{
+    FfxFloat32x4 fColor = LoadRCAS_Input(p);
+
+    fColor.rgb = PrepareRgb(fColor.rgb, Exposure(), PreExposure());
+
+    return fColor;
+}
+void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {}
+
+#include "fsr1/ffx_fsr1.h"
+
+void CurrFilter(FFX_MIN16_U2 pos)
+{
+    FfxFloat32x3 c;
+    FsrRcasF(c.r, c.g, c.b, pos, RCASConfig());
+
+    c = UnprepareRgb(c, Exposure());
+
+    WriteUpscaledOutput(pos, c);
+}
+
+void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid)
+{
+    // Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
+    FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u);
+    CurrFilter(FFX_MIN16_U2(gxy));
+    gxy.x += 8u;
+    CurrFilter(FFX_MIN16_U2(gxy));
+    gxy.y += 8u;
+    CurrFilter(FFX_MIN16_U2(gxy));
+    gxy.x -= 8u;
+    CurrFilter(FFX_MIN16_U2(gxy));
+}
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h.meta
new file mode 100644
index 00000000..7a53a1f4
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: 692efb7cec0df67408a583a7ff34146a
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h
new file mode 100644
index 00000000..a822dfc5
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h
@@ -0,0 +1,146 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#ifndef FFX_FSR3UPSCALER_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H
+#define FFX_FSR3UPSCALER_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H
+
+void ReconstructPrevDepth(FfxInt32x2 iPxPos, FfxFloat32 fDepth, FfxFloat32x2 fMotionVector, FfxInt32x2 iPxDepthSize)
+{
+    fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.1f);
+
+    FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5)) / iPxDepthSize;
+    FfxFloat32x2 fReprojectedUv = fUv + fMotionVector;
+ 
+    BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, RenderSize());
+
+    // Project current depth into previous frame locations.
+    // Push to all pixels having some contribution if reprojection is using bilinear logic.
+    for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) {
+        
+        const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
+        FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex];
+
+        if (fWeight > fReconstructedDepthBilinearWeightThreshold) {
+
+            FfxInt32x2 iStorePos = bilinearInfo.iBasePos + iOffset;
+            if (IsOnScreen(iStorePos, iPxDepthSize)) {
+                StoreReconstructedDepth(iStorePos, fDepth);
+            }
+        }
+    }
+}
+
+void FindNearestDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxInt32x2 iPxSize, FFX_PARAMETER_OUT FfxFloat32 fNearestDepth, FFX_PARAMETER_OUT FfxInt32x2 fNearestDepthCoord)
+{
+    const FfxInt32 iSampleCount = 9;
+    const FfxInt32x2 iSampleOffsets[iSampleCount] = {
+        FfxInt32x2(+0, +0),
+        FfxInt32x2(+1, +0),
+        FfxInt32x2(+0, +1),
+        FfxInt32x2(+0, -1),
+        FfxInt32x2(-1, +0),
+        FfxInt32x2(-1, +1),
+        FfxInt32x2(+1, +1),
+        FfxInt32x2(-1, -1),
+        FfxInt32x2(+1, -1),
+    };
+
+    // pull out the depth loads to allow SC to batch them
+    FfxFloat32 depth[9];
+    FfxInt32 iSampleIndex = 0;
+    FFX_UNROLL
+    for (iSampleIndex = 0; iSampleIndex < iSampleCount; ++iSampleIndex) {
+
+        FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex];
+        depth[iSampleIndex] = LoadInputDepth(iPos);
+    }
+
+    // find closest depth
+    fNearestDepthCoord = iPxPos;
+    fNearestDepth = depth[0];
+    FFX_UNROLL
+    for (iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex) {
+
+        FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex];
+        if (IsOnScreen(iPos, iPxSize)) {
+
+            FfxFloat32 fNdDepth = depth[iSampleIndex];
+#if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH
+            if (fNdDepth > fNearestDepth) {
+#else
+            if (fNdDepth < fNearestDepth) {
+#endif
+                fNearestDepthCoord = iPos;
+                fNearestDepth = fNdDepth;
+            }
+        }
+    }
+}
+
+FfxFloat32 ComputeLockInputLuma(FfxInt32x2 iPxLrPos)
+{
+    //We assume linear data. if non-linear input (sRGB, ...),
+    //then we should convert to linear first and back to sRGB on output.
+    FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos));
+
+    // Use internal auto exposure for locking logic
+    fRgb /= PreExposure();
+    fRgb *= Exposure();
+
+#if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT
+    fRgb = Tonemap(fRgb);
+#endif
+
+    //compute luma used to lock pixels, if used elsewhere the ffxPow must be moved!
+    const FfxFloat32 fLockInputLuma = ffxPow(RGBToPerceivedLuma(fRgb), FfxFloat32(1.0 / 6.0));
+
+    return fLockInputLuma;
+}
+
+void ReconstructAndDilate(FfxInt32x2 iPxLrPos)
+{
+    FfxFloat32 fDilatedDepth;
+    FfxInt32x2 iNearestDepthCoord;
+
+    FindNearestDepth(iPxLrPos, RenderSize(), fDilatedDepth, iNearestDepthCoord);
+
+#if FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+    FfxInt32x2 iSamplePos = iPxLrPos;
+    FfxInt32x2 iMotionVectorPos = iNearestDepthCoord;
+#else
+    FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxLrPos);
+    FfxInt32x2 iMotionVectorPos = ComputeHrPosFromLrPos(iNearestDepthCoord);
+#endif
+
+    FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iMotionVectorPos);
+
+    StoreDilatedDepth(iPxLrPos, fDilatedDepth);
+    StoreDilatedMotionVector(iPxLrPos, fDilatedMotionVector);
+
+    ReconstructPrevDepth(iPxLrPos, fDilatedDepth, fDilatedMotionVector, RenderSize());
+
+    FfxFloat32 fLockInputLuma = ComputeLockInputLuma(iPxLrPos);
+    StoreLockInputLuma(iPxLrPos, fLockInputLuma);
+}
+
+
+#endif //!defined( FFX_FSR3UPSCALER_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H )
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h.meta
new file mode 100644
index 00000000..78ced0dd
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reconstruct_dilated_velocity_and_previous_depth.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: c8b3854bad30a8b40babc5a9805f294e
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h
new file mode 100644
index 00000000..29b75843
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h
@@ -0,0 +1,137 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#ifndef FFX_FSR3UPSCALER_REPROJECT_H
+#define FFX_FSR3UPSCALER_REPROJECT_H
+
+#ifndef FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE
+#define FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE 0 // Reference
+#endif
+
+FfxFloat32x4 WrapHistory(FfxInt32x2 iPxSample)
+{
+    return LoadHistory(iPxSample);
+}
+
+#if FFX_HALF
+FFX_MIN16_F4 WrapHistory(FFX_MIN16_I2 iPxSample)
+{
+    return FFX_MIN16_F4(LoadHistory(iPxSample));
+}
+#endif
+
+
+#if FFX_FSR3UPSCALER_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF
+DeclareCustomFetchBicubicSamplesMin16(FetchHistorySamples, WrapHistory)
+DeclareCustomTextureSampleMin16(HistorySample, FFX_FSR3UPSCALER_GET_LANCZOS_SAMPLER1D(FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples)
+#else
+DeclareCustomFetchBicubicSamples(FetchHistorySamples, WrapHistory)
+DeclareCustomTextureSample(HistorySample, FFX_FSR3UPSCALER_GET_LANCZOS_SAMPLER1D(FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples)
+#endif
+
+FfxFloat32x4 WrapLockStatus(FfxInt32x2 iPxSample)
+{
+    FfxFloat32x4 fSample = FfxFloat32x4(LoadLockStatus(iPxSample), 0.0f, 0.0f);
+    return fSample;
+}
+
+#if FFX_HALF
+FFX_MIN16_F4 WrapLockStatus(FFX_MIN16_I2 iPxSample)
+{
+    FFX_MIN16_F4 fSample = FFX_MIN16_F4(LoadLockStatus(iPxSample), 0.0, 0.0);
+
+    return fSample;
+}
+#endif
+
+#if 1
+#if FFX_FSR3UPSCALER_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF
+DeclareCustomFetchBilinearSamplesMin16(FetchLockStatusSamples, WrapLockStatus)
+DeclareCustomTextureSampleMin16(LockStatusSample, Bilinear, FetchLockStatusSamples)
+#else
+DeclareCustomFetchBilinearSamples(FetchLockStatusSamples, WrapLockStatus)
+DeclareCustomTextureSample(LockStatusSample, Bilinear, FetchLockStatusSamples)
+#endif
+#else
+#if FFX_FSR3UPSCALER_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF
+DeclareCustomFetchBicubicSamplesMin16(FetchLockStatusSamples, WrapLockStatus)
+DeclareCustomTextureSampleMin16(LockStatusSample, FFX_FSR3UPSCALER_GET_LANCZOS_SAMPLER1D(FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchLockStatusSamples)
+#else
+DeclareCustomFetchBicubicSamples(FetchLockStatusSamples, WrapLockStatus)
+DeclareCustomTextureSample(LockStatusSample, FFX_FSR3UPSCALER_GET_LANCZOS_SAMPLER1D(FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchLockStatusSamples)
+#endif
+#endif
+
+FfxFloat32x2 GetMotionVector(FfxInt32x2 iPxHrPos, FfxFloat32x2 fHrUv)
+{
+#if FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+    FfxFloat32x2 fDilatedMotionVector = LoadDilatedMotionVector(FFX_MIN16_I2(fHrUv * RenderSize()));
+#else
+    FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iPxHrPos);
+#endif
+
+    return fDilatedMotionVector;
+}
+
+FfxBoolean IsUvInside(FfxFloat32x2 fUv)
+{
+    return (fUv.x >= 0.0f && fUv.x <= 1.0f) && (fUv.y >= 0.0f && fUv.y <= 1.0f);
+}
+
+void ComputeReprojectedUVs(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxBoolean bIsExistingSample)
+{
+    fReprojectedHrUv = params.fHrUv + params.fMotionVector;
+
+    bIsExistingSample = IsUvInside(fReprojectedHrUv);
+}
+
+void ReprojectHistoryColor(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x3 fHistoryColor, FFX_PARAMETER_OUT FfxFloat32 fTemporalReactiveFactor, FFX_PARAMETER_OUT FfxBoolean bInMotionLastFrame)
+{
+    FfxFloat32x4 fHistory = HistorySample(params.fReprojectedHrUv, DisplaySize());
+
+    fHistoryColor = PrepareRgb(fHistory.rgb, Exposure(), PreviousFramePreExposure());
+
+    fHistoryColor = RGBToYCoCg(fHistoryColor);
+
+    //Compute temporal reactivity info
+    fTemporalReactiveFactor = ffxSaturate(abs(fHistory.w));
+    bInMotionLastFrame = (fHistory.w < 0.0f);
+}
+
+LockState ReprojectHistoryLockStatus(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedLockStatus)
+{
+    LockState state = { FFX_FALSE, FFX_FALSE };
+    const FfxFloat32 fNewLockIntensity = LoadRwNewLocks(params.iPxHrPos);
+    state.NewLock = fNewLockIntensity > (127.0f / 255.0f);
+
+    FfxFloat32 fInPlaceLockLifetime = state.NewLock ? fNewLockIntensity : 0;
+
+    fReprojectedLockStatus = SampleLockStatus(params.fReprojectedHrUv);
+
+    if (fReprojectedLockStatus[LOCK_LIFETIME_REMAINING] != FfxFloat32(0.0f)) {
+        state.WasLockedPrevFrame = true;
+    }
+
+    return state;
+}
+
+#endif //!defined( FFX_FSR3UPSCALER_REPROJECT_H )
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h.meta
new file mode 100644
index 00000000..ea2e14d1
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_reproject.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: 9d893016eebb2564f9a66b80afb0849f
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h
new file mode 100644
index 00000000..d98cfcc0
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h
@@ -0,0 +1,104 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#ifndef FFX_FSR3UPSCALER_RESOURCES_H
+#define FFX_FSR3UPSCALER_RESOURCES_H
+
+#if defined(FFX_CPU) || defined(FFX_GPU)
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL                                           0
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY                              1
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_COLOR                                    2
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS                           3
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_DEPTH                                    4
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_EXPOSURE                                 5
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK                            6
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK        7
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH           8
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS                         9
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_DEPTH                                  10
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR                        11
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LOCK_STATUS                                    12
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NEW_LOCKS                                      13
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR                           14
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY                                   15
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DEBUG_OUTPUT                                   16
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LANCZOS_LUT                                    17
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT                               18
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT                                19
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RCAS_INPUT                                     20
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LOCK_STATUS_1                                  21
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LOCK_STATUS_2                                  22
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1                      23
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2                      24
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY                    25
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_TRANSPARENCY_AND_COMPOSITION  26
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT                      27
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS                         28
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE                                29 // same as FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0                       29
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_1                       30
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_2                       31
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_3                       32
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4                       33
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5                       34
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_6                       35
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_7                       36
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_8                       37
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_9                       38
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_10                      39
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_11                      40
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12                      41
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE                      42
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_AUTO_EXPOSURE                                  43
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_AUTOREACTIVE                                   44
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_AUTOCOMPOSITION_DEPRECATED                     45
+
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR                           46
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR                          47
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1                         48
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1                        49
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2                         50
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2                        51
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS                52
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_1                                 53
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_2                                 54
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA                                55
+
+// Shading change detection mip level setting, value must be in the range [FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0, FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12]
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE          FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4
+#define FFX_FSR3UPSCALER_SHADING_CHANGE_MIP_LEVEL                                           (FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE - FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE)
+
+#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT                                          56
+
+#define FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_FSR3UPSCALER                                     0
+#define FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_SPD                                      1
+#define FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_RCAS                                     2
+#define FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE                              3
+
+#define FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_TONEMAP                                    1
+#define FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP                             2
+#define FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_THRESHOLD                                  4
+#define FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX                               8
+
+#endif // #if defined(FFX_CPU) || defined(FFX_GPU)
+
+#endif //!defined( FFX_FSR3UPSCALER_RESOURCES_H )
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h.meta
new file mode 100644
index 00000000..24cdbd23
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_resources.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: b5a95a38dcfaf3946a5095bbbc42939a
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h
new file mode 100644
index 00000000..d33f70cf
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h
@@ -0,0 +1,606 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#ifndef FFX_FSR3UPSCALER_SAMPLE_H
+#define FFX_FSR3UPSCALER_SAMPLE_H
+
+// suppress warnings
+#ifdef FFX_HLSL
+#pragma warning(disable: 4008) // potentially divide by zero
+#endif //FFX_HLSL
+
+struct FetchedBilinearSamples {
+
+    FfxFloat32x4 fColor00;
+    FfxFloat32x4 fColor10;
+
+    FfxFloat32x4 fColor01;
+    FfxFloat32x4 fColor11;
+};
+
+struct FetchedBicubicSamples {
+
+    FfxFloat32x4 fColor00;
+    FfxFloat32x4 fColor10;
+    FfxFloat32x4 fColor20;
+    FfxFloat32x4 fColor30;
+
+    FfxFloat32x4 fColor01;
+    FfxFloat32x4 fColor11;
+    FfxFloat32x4 fColor21;
+    FfxFloat32x4 fColor31;
+
+    FfxFloat32x4 fColor02;
+    FfxFloat32x4 fColor12;
+    FfxFloat32x4 fColor22;
+    FfxFloat32x4 fColor32;
+
+    FfxFloat32x4 fColor03;
+    FfxFloat32x4 fColor13;
+    FfxFloat32x4 fColor23;
+    FfxFloat32x4 fColor33;
+};
+
+#if FFX_HALF
+struct FetchedBilinearSamplesMin16 {
+
+    FFX_MIN16_F4 fColor00;
+    FFX_MIN16_F4 fColor10;
+
+    FFX_MIN16_F4 fColor01;
+    FFX_MIN16_F4 fColor11;
+};
+
+struct FetchedBicubicSamplesMin16 {
+
+    FFX_MIN16_F4 fColor00;
+    FFX_MIN16_F4 fColor10;
+    FFX_MIN16_F4 fColor20;
+    FFX_MIN16_F4 fColor30;
+
+    FFX_MIN16_F4 fColor01;
+    FFX_MIN16_F4 fColor11;
+    FFX_MIN16_F4 fColor21;
+    FFX_MIN16_F4 fColor31;
+
+    FFX_MIN16_F4 fColor02;
+    FFX_MIN16_F4 fColor12;
+    FFX_MIN16_F4 fColor22;
+    FFX_MIN16_F4 fColor32;
+
+    FFX_MIN16_F4 fColor03;
+    FFX_MIN16_F4 fColor13;
+    FFX_MIN16_F4 fColor23;
+    FFX_MIN16_F4 fColor33;
+};
+#else //FFX_HALF
+#define FetchedBicubicSamplesMin16 FetchedBicubicSamples
+#define FetchedBilinearSamplesMin16 FetchedBilinearSamples
+#endif //FFX_HALF
+
+FfxFloat32x4 Linear(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32 t)
+{
+    return A + (B - A) * t;
+}
+
+FfxFloat32x4 Bilinear(FetchedBilinearSamples BilinearSamples, FfxFloat32x2 fPxFrac)
+{
+    FfxFloat32x4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);
+    FfxFloat32x4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);
+    FfxFloat32x4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);
+    return fColorXY;
+}
+
+#if FFX_HALF
+FFX_MIN16_F4 Linear(FFX_MIN16_F4 A, FFX_MIN16_F4 B, FFX_MIN16_F t)
+{
+    return A + (B - A) * t;
+}
+
+FFX_MIN16_F4 Bilinear(FetchedBilinearSamplesMin16 BilinearSamples, FFX_MIN16_F2 fPxFrac)
+{
+    FFX_MIN16_F4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);
+    FFX_MIN16_F4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);
+    FFX_MIN16_F4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);
+    return fColorXY;
+}
+#endif
+
+FfxFloat32 Lanczos2NoClamp(FfxFloat32 x)
+{
+    const FfxFloat32 PI = 3.141592653589793f; // TODO: share SDK constants
+    return abs(x) < FSR3UPSCALER_EPSILON ? 1.f : (sin(PI * x) / (PI * x)) * (sin(0.5f * PI * x) / (0.5f * PI * x));
+}
+
+FfxFloat32 Lanczos2(FfxFloat32 x)
+{
+    x = ffxMin(abs(x), 2.0f);
+    return Lanczos2NoClamp(x);
+}
+
+#if FFX_HALF
+
+#if 0
+FFX_MIN16_F Lanczos2NoClamp(FFX_MIN16_F x)
+{
+    const FFX_MIN16_F PI = FFX_MIN16_F(3.141592653589793f); // TODO: share SDK constants
+    return abs(x) < FFX_MIN16_F(FSR3UPSCALER_EPSILON) ? FFX_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFX_MIN16_F(0.5f) * PI * x) / (FFX_MIN16_F(0.5f) * PI * x));
+}
+#endif
+
+FFX_MIN16_F Lanczos2(FFX_MIN16_F x)
+{
+    x = ffxMin(abs(x), FFX_MIN16_F(2.0f));
+    return FFX_MIN16_F(Lanczos2NoClamp(x));
+}
+#endif //FFX_HALF
+
+// FSR1 lanczos approximation. Input is x*x and must be <= 4.
+FfxFloat32 Lanczos2ApproxSqNoClamp(FfxFloat32 x2)
+{
+    FfxFloat32 a = (2.0f / 5.0f) * x2 - 1;
+    FfxFloat32 b = (1.0f / 4.0f) * x2 - 1;
+    return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b);
+}
+
+#if FFX_HALF
+FFX_MIN16_F Lanczos2ApproxSqNoClamp(FFX_MIN16_F x2)
+{
+    FFX_MIN16_F a = FFX_MIN16_F(2.0f / 5.0f) * x2 - FFX_MIN16_F(1);
+    FFX_MIN16_F b = FFX_MIN16_F(1.0f / 4.0f) * x2 - FFX_MIN16_F(1);
+    return (FFX_MIN16_F(25.0f / 16.0f) * a * a - FFX_MIN16_F(25.0f / 16.0f - 1)) * (b * b);
+}
+#endif //FFX_HALF
+
+FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2)
+{
+    x2 = ffxMin(x2, 4.0f);
+    return Lanczos2ApproxSqNoClamp(x2);
+}
+
+#if FFX_HALF
+FFX_MIN16_F Lanczos2ApproxSq(FFX_MIN16_F x2)
+{
+    x2 = ffxMin(x2, FFX_MIN16_F(4.0f));
+    return Lanczos2ApproxSqNoClamp(x2);
+}
+#endif //FFX_HALF
+
+FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x)
+{
+    return Lanczos2ApproxSqNoClamp(x * x);
+}
+
+#if FFX_HALF
+FFX_MIN16_F Lanczos2ApproxNoClamp(FFX_MIN16_F x)
+{
+    return Lanczos2ApproxSqNoClamp(x * x);
+}
+#endif //FFX_HALF
+
+FfxFloat32 Lanczos2Approx(FfxFloat32 x)
+{
+    return Lanczos2ApproxSq(x * x);
+}
+
+#if FFX_HALF
+FFX_MIN16_F Lanczos2Approx(FFX_MIN16_F x)
+{
+    return Lanczos2ApproxSq(x * x);
+}
+#endif //FFX_HALF
+
+FfxFloat32 Lanczos2_UseLUT(FfxFloat32 x)
+{
+    return SampleLanczos2Weight(abs(x));
+}
+
+#if FFX_HALF
+FFX_MIN16_F Lanczos2_UseLUT(FFX_MIN16_F x)
+{
+    return FFX_MIN16_F(SampleLanczos2Weight(abs(x)));
+}
+#endif //FFX_HALF
+
+FfxFloat32x4 Lanczos2_UseLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
+{
+    FfxFloat32 fWeight0 = Lanczos2_UseLUT(-1.f - t);
+    FfxFloat32 fWeight1 = Lanczos2_UseLUT(-0.f - t);
+    FfxFloat32 fWeight2 = Lanczos2_UseLUT(+1.f - t);
+    FfxFloat32 fWeight3 = Lanczos2_UseLUT(+2.f - t);
+    return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+#if FFX_HALF
+FFX_MIN16_F4 Lanczos2_UseLUT(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
+{
+    FFX_MIN16_F fWeight0 = Lanczos2_UseLUT(FFX_MIN16_F(-1.f) - t);
+    FFX_MIN16_F fWeight1 = Lanczos2_UseLUT(FFX_MIN16_F(-0.f) - t);
+    FFX_MIN16_F fWeight2 = Lanczos2_UseLUT(FFX_MIN16_F(+1.f) - t);
+    FFX_MIN16_F fWeight3 = Lanczos2_UseLUT(FFX_MIN16_F(+2.f) - t);
+    return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+#endif
+
+FfxFloat32x4 Lanczos2(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
+{
+    FfxFloat32 fWeight0 = Lanczos2(-1.f - t);
+    FfxFloat32 fWeight1 = Lanczos2(-0.f - t);
+    FfxFloat32 fWeight2 = Lanczos2(+1.f - t);
+    FfxFloat32 fWeight3 = Lanczos2(+2.f - t);
+    return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+
+FfxFloat32x4 Lanczos2(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
+{
+    FfxFloat32x4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+    FfxFloat32x4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+    FfxFloat32x4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+    FfxFloat32x4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+    FfxFloat32x4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+    // Deringing
+
+    // TODO: only use 4 by checking jitter
+    const FfxInt32 iDeringingSampleCount = 4;
+    const FfxFloat32x4 fDeringingSamples[4] = {
+        Samples.fColor11,
+        Samples.fColor21,
+        Samples.fColor12,
+        Samples.fColor22,
+    };
+
+    FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
+    FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
+
+    FFX_UNROLL
+    for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {
+
+        fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+        fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+    }
+
+    fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+
+    return fColorXY;
+}
+
+#if FFX_HALF
+FFX_MIN16_F4 Lanczos2(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
+{
+    FFX_MIN16_F fWeight0 = Lanczos2(FFX_MIN16_F(-1.f) - t);
+    FFX_MIN16_F fWeight1 = Lanczos2(FFX_MIN16_F(-0.f) - t);
+    FFX_MIN16_F fWeight2 = Lanczos2(FFX_MIN16_F(+1.f) - t);
+    FFX_MIN16_F fWeight3 = Lanczos2(FFX_MIN16_F(+2.f) - t);
+    return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+
+FFX_MIN16_F4 Lanczos2(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
+{
+    FFX_MIN16_F4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+    FFX_MIN16_F4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+    FFX_MIN16_F4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+    FFX_MIN16_F4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+    FFX_MIN16_F4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+    // Deringing
+
+    // TODO: only use 4 by checking jitter
+    const FfxInt32 iDeringingSampleCount = 4;
+    const FFX_MIN16_F4 fDeringingSamples[4] = {
+        Samples.fColor11,
+        Samples.fColor21,
+        Samples.fColor12,
+        Samples.fColor22,
+    };
+
+    FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
+    FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
+
+    FFX_UNROLL
+    for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
+    {
+        fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+        fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+    }
+
+    fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+
+    return fColorXY;
+}
+#endif //FFX_HALF
+
+
+FfxFloat32x4 Lanczos2LUT(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
+{
+    FfxFloat32x4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+    FfxFloat32x4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+    FfxFloat32x4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+    FfxFloat32x4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+    FfxFloat32x4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+    // Deringing
+
+    // TODO: only use 4 by checking jitter
+    const FfxInt32 iDeringingSampleCount = 4;
+    const FfxFloat32x4 fDeringingSamples[4] = {
+        Samples.fColor11,
+        Samples.fColor21,
+        Samples.fColor12,
+        Samples.fColor22,
+    };
+
+    FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
+    FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
+
+    FFX_UNROLL
+    for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {
+
+        fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+        fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+    }
+
+    fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+
+    return fColorXY;
+}
+
+#if FFX_HALF
+FFX_MIN16_F4 Lanczos2LUT(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
+{
+    FFX_MIN16_F4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+    FFX_MIN16_F4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+    FFX_MIN16_F4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+    FFX_MIN16_F4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+    FFX_MIN16_F4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+    // Deringing
+
+    // TODO: only use 4 by checking jitter
+    const FfxInt32 iDeringingSampleCount = 4;
+    const FFX_MIN16_F4 fDeringingSamples[4] = {
+        Samples.fColor11,
+        Samples.fColor21,
+        Samples.fColor12,
+        Samples.fColor22,
+    };
+
+    FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
+    FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
+
+    FFX_UNROLL
+    for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
+    {
+        fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+        fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+    }
+
+    fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+
+    return fColorXY;
+}
+#endif //FFX_HALF
+
+
+
+FfxFloat32x4 Lanczos2Approx(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
+{
+    FfxFloat32 fWeight0 = Lanczos2ApproxNoClamp(-1.f - t);
+    FfxFloat32 fWeight1 = Lanczos2ApproxNoClamp(-0.f - t);
+    FfxFloat32 fWeight2 = Lanczos2ApproxNoClamp(+1.f - t);
+    FfxFloat32 fWeight3 = Lanczos2ApproxNoClamp(+2.f - t);
+    return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+
+#if FFX_HALF
+FFX_MIN16_F4 Lanczos2Approx(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
+{
+    FFX_MIN16_F fWeight0 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-1.f) - t);
+    FFX_MIN16_F fWeight1 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-0.f) - t);
+    FFX_MIN16_F fWeight2 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+1.f) - t);
+    FFX_MIN16_F fWeight3 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+2.f) - t);
+    return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+#endif //FFX_HALF
+
+FfxFloat32x4 Lanczos2Approx(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
+{
+    FfxFloat32x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+    FfxFloat32x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+    FfxFloat32x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+    FfxFloat32x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+    FfxFloat32x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+    // Deringing
+
+    // TODO: only use 4 by checking jitter
+    const FfxInt32 iDeringingSampleCount = 4;
+    const FfxFloat32x4 fDeringingSamples[4] = {
+        Samples.fColor11,
+        Samples.fColor21,
+        Samples.fColor12,
+        Samples.fColor22,
+    };
+
+    FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
+    FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
+
+    FFX_UNROLL
+    for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
+    {
+        fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+        fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+    }
+
+    fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+
+    return fColorXY;
+}
+
+#if FFX_HALF
+FFX_MIN16_F4 Lanczos2Approx(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
+{
+    FFX_MIN16_F4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+    FFX_MIN16_F4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+    FFX_MIN16_F4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+    FFX_MIN16_F4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+    FFX_MIN16_F4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+    // Deringing
+
+    // TODO: only use 4 by checking jitter
+    const FfxInt32 iDeringingSampleCount = 4;
+    const FFX_MIN16_F4 fDeringingSamples[4] = {
+        Samples.fColor11,
+        Samples.fColor21,
+        Samples.fColor12,
+        Samples.fColor22,
+    };
+
+    FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
+    FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
+
+    FFX_UNROLL
+    for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
+    {
+        fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+        fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+    }
+
+    fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+
+    return fColorXY;
+}
+#endif
+
+// Clamp by offset direction. Assuming iPxSample is already in range and iPxOffset is compile time constant.
+FfxInt32x2 ClampCoord(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize)
+{
+    FfxInt32x2 result = iPxSample + iPxOffset;
+    result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x;
+    result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x;
+    result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y;
+    result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y;
+    return result;
+}
+#if FFX_HALF
+FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize)
+{
+    FFX_MIN16_I2 result = iPxSample + iPxOffset;
+    result.x = (iPxOffset.x < FFX_MIN16_I(0)) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x;
+    result.x = (iPxOffset.x > FFX_MIN16_I(0)) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x;
+    result.y = (iPxOffset.y < FFX_MIN16_I(0)) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y;
+    result.y = (iPxOffset.y > FFX_MIN16_I(0)) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y;
+    return result;
+}
+#endif //FFX_HALF
+
+
+#define DeclareCustomFetchBicubicSamplesWithType(SampleType, TextureType, AddrType, Name, LoadTexture)               \
+    SampleType Name(AddrType iPxSample, AddrType iTextureSize)                                          \
+    {                                                                                                   \
+        SampleType Samples;                                                                             \
+                                                                                                        \
+        Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, -1), iTextureSize)));    \
+        Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, -1), iTextureSize)));    \
+        Samples.fColor20 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, -1), iTextureSize)));    \
+        Samples.fColor30 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, -1), iTextureSize)));    \
+                                                                                                        \
+        Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +0), iTextureSize)));    \
+        Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize)));    \
+        Samples.fColor21 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize)));    \
+        Samples.fColor31 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +0), iTextureSize)));    \
+                                                                                                        \
+        Samples.fColor02 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +1), iTextureSize)));    \
+        Samples.fColor12 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize)));    \
+        Samples.fColor22 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize)));    \
+        Samples.fColor32 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +1), iTextureSize)));    \
+                                                                                                        \
+        Samples.fColor03 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +2), iTextureSize)));    \
+        Samples.fColor13 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +2), iTextureSize)));    \
+        Samples.fColor23 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +2), iTextureSize)));    \
+        Samples.fColor33 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +2), iTextureSize)));    \
+                                                                                                        \
+        return Samples;                                                                                 \
+    }
+
+#define DeclareCustomFetchBicubicSamples(Name, LoadTexture)                                             \
+    DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture)
+
+#define DeclareCustomFetchBicubicSamplesMin16(Name, LoadTexture)                                        \
+    DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture)
+
+#define DeclareCustomFetchBilinearSamplesWithType(SampleType, TextureType,AddrType, Name, LoadTexture)  \
+    SampleType Name(AddrType iPxSample, AddrType iTextureSize)                                          \
+    {                                                                                                   \
+        SampleType Samples;                                                                             \
+        Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize)));           \
+        Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize)));           \
+        Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize)));           \
+        Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize)));           \
+        return Samples;                                                                                 \
+    }
+
+#define DeclareCustomFetchBilinearSamples(Name, LoadTexture)                                             \
+    DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture)
+
+#define DeclareCustomFetchBilinearSamplesMin16(Name, LoadTexture)                                        \
+    DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture)
+
+// BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102)
+// is common, so iPxSample can "jitter"
+#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples)                                           \
+    FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize)                                               \
+    {                                                                                                                \
+        FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f);                \
+        /* Clamp base coords */                                                                                      \
+        fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x));                                 \
+        fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y));                                 \
+        /* */                                                                                                        \
+        FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample));                                                         \
+        FfxFloat32x2 fPxFrac = ffxFract(fPxSample);                                                                  \
+        FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac));    \
+        return fColorXY;                                                                                             \
+    }
+
+#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples)                                      \
+    FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize)                                               \
+    {                                                                                                                \
+        FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f);                \
+        /* Clamp base coords */                                                                                      \
+        fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x));                                 \
+        fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y));                                 \
+        /* */                                                                                                        \
+        FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample));                                                         \
+        FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample));                                                    \
+        FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac));    \
+        return fColorXY;                                                                                             \
+    }
+
+#define FFX_FSR3UPSCALER_CONCAT_ID(x, y) x ## y
+#define FFX_FSR3UPSCALER_CONCAT(x, y) FFX_FSR3UPSCALER_CONCAT_ID(x, y)
+#define FFX_FSR3UPSCALER_SAMPLER_1D_0 Lanczos2
+#define FFX_FSR3UPSCALER_SAMPLER_1D_1 Lanczos2LUT
+#define FFX_FSR3UPSCALER_SAMPLER_1D_2 Lanczos2Approx
+
+#define FFX_FSR3UPSCALER_GET_LANCZOS_SAMPLER1D(x) FFX_FSR3UPSCALER_CONCAT(FFX_FSR3UPSCALER_SAMPLER_1D_, x)
+
+#endif //!defined( FFX_FSR3UPSCALER_SAMPLE_H )
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h.meta
new file mode 100644
index 00000000..1a463763
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_sample.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: dcb900c9deecd06419a8a4c10c305890
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h
new file mode 100644
index 00000000..2d446bbb
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h
@@ -0,0 +1,250 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#define USE_YCOCG 1
+
+#define fAutogenEpsilon 0.01f
+
+// EXPERIMENTAL
+
+FFX_MIN16_F ComputeAutoTC_01(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
+{
+    FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);
+    FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);
+    FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);
+    FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);
+
+#if USE_YCOCG    
+    colorPreAlpha = RGBToYCoCg(colorPreAlpha);
+    colorPostAlpha = RGBToYCoCg(colorPostAlpha);
+    colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);
+    colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);
+#endif
+
+    FfxFloat32x3 colorDeltaCurr = colorPostAlpha - colorPreAlpha;
+    FfxFloat32x3 colorDeltaPrev = colorPrevPostAlpha - colorPrevPreAlpha;
+    bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDeltaCurr), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
+    bool hadAlpha = any(FFX_GREATER_THAN(abs(colorDeltaPrev), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
+
+    FfxFloat32x3 X = colorPreAlpha;
+    FfxFloat32x3 Y = colorPostAlpha;
+    FfxFloat32x3 Z = colorPrevPreAlpha;
+    FfxFloat32x3 W = colorPrevPostAlpha;
+
+    FFX_MIN16_F retVal = FFX_MIN16_F(ffxSaturate(dot(abs(abs(Y - X) - abs(W - Z)), FfxFloat32x3(1, 1, 1))));
+
+    // cleanup very small values
+    retVal = (retVal < TcThreshold()) ? FFX_MIN16_F(0.0f) : FFX_MIN16_F(1.f);
+
+    return retVal;
+}
+
+// works ok: thin edges
+FFX_MIN16_F ComputeAutoTC_02(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
+{
+    FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);
+    FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);
+    FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);
+    FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);
+
+#if USE_YCOCG    
+    colorPreAlpha = RGBToYCoCg(colorPreAlpha);
+    colorPostAlpha = RGBToYCoCg(colorPostAlpha);
+    colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);
+    colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);
+#endif
+
+    FfxFloat32x3 colorDelta = colorPostAlpha - colorPreAlpha;
+    FfxFloat32x3 colorPrevDelta = colorPrevPostAlpha - colorPrevPreAlpha;
+    bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
+    bool hadAlpha = any(FFX_GREATER_THAN(abs(colorPrevDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
+
+    FfxFloat32x3 delta = colorPostAlpha - colorPreAlpha;              //prev+1*d = post   => d = color, alpha =
+    FfxFloat32x3 deltaPrev = colorPrevPostAlpha - colorPrevPreAlpha;
+
+    FfxFloat32x3 X = colorPrevPreAlpha;
+    FfxFloat32x3 N = colorPreAlpha - colorPrevPreAlpha;
+    FfxFloat32x3 YAminusXA = colorPrevPostAlpha - colorPrevPreAlpha;
+    FfxFloat32x3 NminusNA = colorPostAlpha - colorPrevPostAlpha;
+
+    FfxFloat32x3 A = (hasAlpha || hadAlpha) ? NminusNA / max(FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon), N) : FfxFloat32x3(0, 0, 0);
+
+    FFX_MIN16_F retVal = FFX_MIN16_F( max(max(A.x, A.y), A.z) );
+
+    // only pixels that have significantly changed in color shuold be considered
+    retVal = ffxSaturate(retVal * FFX_MIN16_F(length(colorPostAlpha - colorPrevPostAlpha)) );
+
+    return retVal;
+}
+
+// This function computes the TransparencyAndComposition mask:
+// This mask indicates pixels that should discard locks and apply color clamping.
+// 
+// Typically this is the case for translucent pixels (that don't write depth values) or pixels where the correctness of 
+// the MVs can not be guaranteed (e.g. procedutal movement or vegetation that does not have MVs to reduce the cost during rasterization)
+// Also, large changes in color due to changed lighting should be marked to remove locks on pixels with "old" lighting.
+//
+// This function takes a opaque only and a final texture and uses internal copies of those textures from the last frame.
+// The function tries to determine where the color changes between opaque only and final image to determine the pixels that use transparency.
+// Also it uses the previous frames and detects where the use of transparency changed to mark those pixels.
+// Additionally it marks pixels where the color changed significantly in the opaque only image, e.g. due to lighting or texture animation.
+// 
+// In the final step it stores the current textures in internal textures for the next frame
+
+FFX_MIN16_F ComputeTransparencyAndComposition(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
+{
+    FFX_MIN16_F retVal = ComputeAutoTC_02(uDispatchThreadId, iPrevIdx);
+
+    // [branch]
+    if (retVal > FFX_MIN16_F(0.01f))
+    {
+        retVal = ComputeAutoTC_01(uDispatchThreadId, iPrevIdx);
+    }
+    return retVal;
+}
+
+float computeSolidEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos)
+{
+    float lum[9];
+    int i = 0;
+    for (int y = -1; y < 2; ++y)
+    {
+        for (int x = -1; x < 2; ++x)
+        {
+            FfxFloat32x3 curCol  = LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb;
+            FfxFloat32x3 prevCol = LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb;
+            lum[i++] = length(curCol - prevCol);
+        }
+    }
+
+    //float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]);
+    //float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]);
+
+    //return sqrt(gradX * gradX + gradY * gradY);
+
+    float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]);
+    float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]);
+
+    return sqrt(sqrt(gradX * gradY));
+}
+
+float computeAlphaEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos)
+{
+    float lum[9];
+    int i = 0;
+    for (int y = -1; y < 2; ++y)
+    {
+        for (int x = -1; x < 2; ++x)
+        {
+            FfxFloat32x3 curCol  = abs(LoadInputColor(curPos + FFX_MIN16_I2(x, y)).rgb - LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb);
+            FfxFloat32x3 prevCol = abs(LoadPrevPostAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb - LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb);
+            lum[i++] = length(curCol - prevCol);
+        }
+    }
+
+    //float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]);
+    //float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]);
+
+    //return sqrt(gradX * gradX + gradY * gradY);
+
+    float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]);
+    float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]);
+
+    return sqrt(sqrt(gradX * gradY));
+}
+
+FFX_MIN16_F ComputeAabbOverlap(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
+{
+    FFX_MIN16_F retVal = FFX_MIN16_F(0.f);
+
+    FfxFloat32x2 fMotionVector = LoadInputMotionVector(uDispatchThreadId);
+    FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);
+    FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);
+    FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);
+    FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);
+
+#if USE_YCOCG    
+    colorPreAlpha = RGBToYCoCg(colorPreAlpha);
+    colorPostAlpha = RGBToYCoCg(colorPostAlpha);
+    colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);
+    colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);
+#endif
+    FfxFloat32x3 minPrev = FFX_MIN16_F3(+1000.f, +1000.f, +1000.f);
+    FfxFloat32x3 maxPrev = FFX_MIN16_F3(-1000.f, -1000.f, -1000.f);
+    for (int y = -1; y < 2; ++y)
+    {
+        for (int x = -1; x < 2; ++x)
+        {
+            FfxFloat32x3 W = LoadPrevPostAlpha(iPrevIdx + FFX_MIN16_I2(x, y));
+
+#if USE_YCOCG
+            W = RGBToYCoCg(W);
+#endif
+            minPrev = min(minPrev, W);
+            maxPrev = max(maxPrev, W);
+        }
+    }
+    // instead of computing the overlap: simply count how many samples are outside
+    // set reactive based on that
+    FFX_MIN16_F count = FFX_MIN16_F(0.f);
+    for (int y = -1; y < 2; ++y)
+    {
+        for (int x = -1; x < 2; ++x)
+        {
+            FfxFloat32x3 Y = LoadInputColor(uDispatchThreadId + FFX_MIN16_I2(x, y));
+
+#if USE_YCOCG
+            Y = RGBToYCoCg(Y);
+#endif
+            count += ((Y.x < minPrev.x) || (Y.x > maxPrev.x)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);
+            count += ((Y.y < minPrev.y) || (Y.y > maxPrev.y)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);
+            count += ((Y.z < minPrev.z) || (Y.z > maxPrev.z)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);
+        }
+    }
+    retVal = count / FFX_MIN16_F(27.f);
+
+    return retVal;
+}
+
+
+// This function computes the Reactive mask:
+// We want pixels marked where the alpha portion of the frame changes a lot between neighbours
+// Those pixels are expected to change quickly between frames, too. (e.g. small particles, reflections on curved surfaces...)
+// As a result history would not be trustworthy.
+// On the other hand we don't want pixels marked where pre-alpha has a large differnce, since those would profit from accumulation
+// For mirrors we may assume the pre-alpha is pretty uniform color.
+// 
+// This works well generally, but also marks edge pixels
+FFX_MIN16_F ComputeReactive(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
+{
+    // we only get here if alpha has a significant contribution and has changed since last frame.
+    FFX_MIN16_F retVal = FFX_MIN16_F(0.f);
+
+    // mark pixels with huge variance in alpha as reactive
+    FFX_MIN16_F alphaEdge = FFX_MIN16_F(computeAlphaEdge(uDispatchThreadId, iPrevIdx));
+    FFX_MIN16_F opaqueEdge = FFX_MIN16_F(computeSolidEdge(uDispatchThreadId, iPrevIdx));
+    retVal = ffxSaturate(alphaEdge - opaqueEdge);
+
+    // the above also marks edge pixels due to jitter, so we need to cancel those out
+
+
+    return retVal;
+}
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h.meta
new file mode 100644
index 00000000..99d54e78
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_tcr_autogen.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: f01d5a8fbd1f34a4ea8d971755a21b6c
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h
new file mode 100644
index 00000000..47e7ccf5
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h
@@ -0,0 +1,195 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+#ifndef FFX_FSR3UPSCALER_UPSAMPLE_H
+#define FFX_FSR3UPSCALER_UPSAMPLE_H
+
+FFX_STATIC const FfxUInt32 iLanczos2SampleCount = 16;
+
+void Deringing(RectificationBox clippingBox, FFX_PARAMETER_INOUT FfxFloat32x3 fColor)
+{
+    fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax);
+}
+#if FFX_HALF
+void Deringing(RectificationBoxMin16 clippingBox, FFX_PARAMETER_INOUT FFX_MIN16_F3 fColor)
+{
+    fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax);
+}
+#endif
+
+#ifndef FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE
+#define FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE 2 // Approximate
+#endif
+
+FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32 fKernelWeight)
+{
+    FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx;
+#if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE
+    FfxFloat32 fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased));
+#elif FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT
+    FfxFloat32 fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased));
+#elif FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE
+    FfxFloat32 fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
+#else
+#error "Invalid Lanczos type"
+#endif
+    return fSampleWeight;
+}
+
+#if FFX_HALF
+FFX_MIN16_F GetUpsampleLanczosWeight(FFX_MIN16_F2 fSrcSampleOffset, FFX_MIN16_F fKernelWeight)
+{
+    FFX_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx;
+#if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE
+    FFX_MIN16_F fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased));
+#elif FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT
+    FFX_MIN16_F fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased));
+#elif FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE
+    FFX_MIN16_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
+
+    // To Test: Save reciproqual sqrt compute
+    // FfxFloat32 fSampleWeight = Lanczos2Sq_UseLUT(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
+#else
+#error "Invalid Lanczos type"
+#endif
+    return fSampleWeight;
+}
+#endif
+
+FfxFloat32 ComputeMaxKernelWeight() {
+    const FfxFloat32 fKernelSizeBias = 1.0f;
+
+    FfxFloat32 fKernelWeight = FfxFloat32(1) + (FfxFloat32(1.0f) / FfxFloat32x2(DownscaleFactor()) - FfxFloat32(1)).x * FfxFloat32(fKernelSizeBias);
+
+    return ffxMin(FfxFloat32(1.99f), fKernelWeight);
+}
+
+FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params,
+    FFX_PARAMETER_INOUT RectificationBox clippingBox, FfxFloat32 fReactiveFactor)
+{
+    #if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF
+    #include "ffx_fsr3upscaler_force16_begin.h"
+    #endif
+    // We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly)
+    FfxFloat32x2 fDstOutputPos = FfxFloat32x2(params.iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f);      // Destination resolution output pixel center position
+    FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor();                   // Source resolution output pixel center position
+    FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos));                     // TODO: what about weird upscale factors...
+
+    #if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF
+    #include "ffx_fsr3upscaler_force16_end.h"
+    #endif
+
+    FfxFloat32x3 fSamples[iLanczos2SampleCount];
+
+    FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0
+
+    FfxInt32x2 offsetTL;
+    offsetTL.x = (fSrcUnjitteredPos.x > fSrcOutputPos.x) ? FfxInt32(-2) : FfxInt32(-1);
+    offsetTL.y = (fSrcUnjitteredPos.y > fSrcOutputPos.y) ? FfxInt32(-2) : FfxInt32(-1);
+
+    //Load samples
+    // If fSrcUnjitteredPos.y > fSrcOutputPos.y, indicates offsetTL.y = -2, sample offset Y will be [-2, 1], clipbox will be rows [1, 3].
+    // Flip row# for sampling offset in this case, so first 0~2 rows in the sampled array can always be used for computing the clipbox.
+    // This reduces branch or cmove on sampled colors, but moving this overhead to sample position / weight calculation time which apply to less values.
+    const FfxBoolean bFlipRow = fSrcUnjitteredPos.y > fSrcOutputPos.y;
+    const FfxBoolean bFlipCol = fSrcUnjitteredPos.x > fSrcOutputPos.x;
+
+    FfxFloat32x2 fOffsetTL = FfxFloat32x2(offsetTL);
+
+    FFX_UNROLL
+    for (FfxInt32 row = 0; row < 3; row++) {
+
+        FFX_UNROLL
+            for (FfxInt32 col = 0; col < 3; col++) {
+                FfxInt32 iSampleIndex = col + (row << 2);
+
+                FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row);
+                FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + offsetTL + sampleColRow;
+
+                const FfxInt32x2 sampleCoord = ClampLoad(iSrcSamplePos, FfxInt32x2(0, 0), FfxInt32x2(RenderSize()));
+
+                fSamples[iSampleIndex] = LoadPreparedInputColor(FfxInt32x2(sampleCoord));
+            }
+    }
+
+    FfxFloat32x4 fColorAndWeight = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f);
+
+    FfxFloat32x2 fBaseSampleOffset = FfxFloat32x2(fSrcUnjitteredPos - fSrcOutputPos);
+
+    // Identify how much of each upsampled color to be used for this frame
+    const FfxFloat32 fKernelReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32(params.bIsNewSample));
+    const FfxFloat32 fKernelBiasMax = ComputeMaxKernelWeight() * (1.0f - fKernelReactiveFactor);
+
+    const FfxFloat32 fKernelBiasMin = ffxMax(1.0f, ((1.0f + fKernelBiasMax) * 0.3f));
+    const FfxFloat32 fKernelBiasFactor = ffxMax(0.0f, ffxMax(0.25f * params.fDepthClipFactor, fKernelReactiveFactor));
+    const FfxFloat32 fKernelBias = ffxLerp(fKernelBiasMax, fKernelBiasMin, fKernelBiasFactor);
+
+    const FfxFloat32 fRectificationCurveBias = ffxLerp(-2.0f, -3.0f, ffxSaturate(params.fHrVelocity / 50.0f));
+
+    FFX_UNROLL
+    for (FfxInt32 row = 0; row < 3; row++) {
+        FFX_UNROLL
+        for (FfxInt32 col = 0; col < 3; col++) {
+            FfxInt32 iSampleIndex = col + (row << 2);
+
+            const FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row);
+            const FfxFloat32x2 fOffset = fOffsetTL + FfxFloat32x2(sampleColRow);
+            FfxFloat32x2 fSrcSampleOffset = fBaseSampleOffset + fOffset;
+
+            FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow;
+
+            const FfxFloat32 fOnScreenFactor = FfxFloat32(IsOnScreen(FfxInt32x2(iSrcSamplePos), FfxInt32x2(RenderSize())));
+            FfxFloat32 fSampleWeight = fOnScreenFactor * FfxFloat32(GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelBias));
+
+            fColorAndWeight += FfxFloat32x4(fSamples[iSampleIndex] * fSampleWeight, fSampleWeight);
+
+            // Update rectification box
+            {
+                const FfxFloat32 fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset);
+                const FfxFloat32 fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq);
+
+                const FfxBoolean bInitialSample = (row == 0) && (col == 0);
+                RectificationBoxAddSample(bInitialSample, clippingBox, fSamples[iSampleIndex], fBoxSampleWeight);
+            }
+        }
+    }
+
+    RectificationBoxComputeVarianceBoxData(clippingBox);
+
+    fColorAndWeight.w *= FfxFloat32(fColorAndWeight.w > FSR3UPSCALER_EPSILON);
+
+    if (fColorAndWeight.w > FSR3UPSCALER_EPSILON) {
+        // Normalize for deringing (we need to compare colors)
+        fColorAndWeight.xyz = fColorAndWeight.xyz / fColorAndWeight.w;
+        fColorAndWeight.w *= fUpsampleLanczosWeightScale;
+
+        Deringing(clippingBox, fColorAndWeight.xyz);
+    }
+
+    #if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF
+    #include "ffx_fsr3upscaler_force16_end.h"
+    #endif
+
+    return fColorAndWeight;
+}
+
+#endif //!defined( FFX_FSR3UPSCALER_UPSAMPLE_H )
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h.meta
new file mode 100644
index 00000000..e4153a0d
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/ffx_fsr3upscaler_upsample.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: 3e7832c4a9154414f9eaa125acfe6cd5
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/fsr1.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/fsr1.meta
new file mode 100644
index 00000000..731c94fd
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/fsr1.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: 09438bc445e66204f970dc99ca8dae5a
+folderAsset: yes
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/fsr1/ffx_fsr1.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/fsr1/ffx_fsr1.h
new file mode 100644
index 00000000..e780995f
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/fsr1/ffx_fsr1.h
@@ -0,0 +1,1252 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+/// @defgroup FfxGPUFsr1 FidelityFX FSR1
+/// FidelityFX Super Resolution 1 GPU documentation
+/// 
+/// @ingroup FfxGPUEffects
+
+/// Setup required constant values for EASU (works on CPU or GPU).
+///
+/// @param [out] con0
+/// @param [out] con1
+/// @param [out] con2
+/// @param [out] con3
+/// @param [in] inputViewportInPixelsX                  The rendered image resolution being upscaled in X dimension.
+/// @param [in] inputViewportInPixelsY                  The rendered image resolution being upscaled in Y dimension.
+/// @param [in] inputSizeInPixelsX                      The resolution of the resource containing the input image (useful for dynamic resolution) in X dimension.
+/// @param [in] inputSizeInPixelsY                      The resolution of the resource containing the input image (useful for dynamic resolution) in Y dimension.
+/// @param [in] outputSizeInPixelsX                     The display resolution which the input image gets upscaled to in X dimension.
+/// @param [in] outputSizeInPixelsY                     The display resolution which the input image gets upscaled to in Y dimension.
+/// 
+/// @ingroup FfxGPUFsr1
+FFX_STATIC void ffxFsrPopulateEasuConstants(
+    FFX_PARAMETER_INOUT FfxUInt32x4 con0,
+    FFX_PARAMETER_INOUT FfxUInt32x4 con1,
+    FFX_PARAMETER_INOUT FfxUInt32x4 con2,
+    FFX_PARAMETER_INOUT FfxUInt32x4 con3,
+    FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsX,
+    FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsY,
+    FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsX,
+    FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsY,
+    FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsX,
+    FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsY)
+{
+    // Output integer position to a pixel position in viewport.
+    con0[0] = ffxAsUInt32(inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX));
+    con0[1] = ffxAsUInt32(inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY));
+    con0[2] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX) - FfxFloat32(0.5));
+    con0[3] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY) - FfxFloat32(0.5));
+
+    // Viewport pixel position to normalized image space.
+    // This is used to get upper-left of 'F' tap.
+    con1[0] = ffxAsUInt32(ffxReciprocal(inputSizeInPixelsX));
+    con1[1] = ffxAsUInt32(ffxReciprocal(inputSizeInPixelsY));
+
+    // Centers of gather4, first offset from upper-left of 'F'.
+    //      +---+---+
+    //      |   |   |
+    //      +--(0)--+
+    //      | b | c |
+    //  +---F---+---+---+
+    //  | e | f | g | h |
+    //  +--(1)--+--(2)--+
+    //  | i | j | k | l |
+    //  +---+---+---+---+
+    //      | n | o |
+    //      +--(3)--+
+    //      |   |   |
+    //      +---+---+
+    con1[2] = ffxAsUInt32(FfxFloat32(1.0) * ffxReciprocal(inputSizeInPixelsX));
+    con1[3] = ffxAsUInt32(FfxFloat32(-1.0) * ffxReciprocal(inputSizeInPixelsY));
+
+    // These are from (0) instead of 'F'.
+    con2[0] = ffxAsUInt32(FfxFloat32(-1.0) * ffxReciprocal(inputSizeInPixelsX));
+    con2[1] = ffxAsUInt32(FfxFloat32(2.0) * ffxReciprocal(inputSizeInPixelsY));
+    con2[2] = ffxAsUInt32(FfxFloat32(1.0) * ffxReciprocal(inputSizeInPixelsX));
+    con2[3] = ffxAsUInt32(FfxFloat32(2.0) * ffxReciprocal(inputSizeInPixelsY));
+    con3[0] = ffxAsUInt32(FfxFloat32(0.0) * ffxReciprocal(inputSizeInPixelsX));
+    con3[1] = ffxAsUInt32(FfxFloat32(4.0) * ffxReciprocal(inputSizeInPixelsY));
+    con3[2] = con3[3] = 0;
+}
+
+/// Setup required constant values for EASU (works on CPU or GPU).
+///
+/// @param [out] con0
+/// @param [out] con1
+/// @param [out] con2
+/// @param [out] con3
+/// @param [in] inputViewportInPixelsX              The resolution of the input in the X dimension.
+/// @param [in] inputViewportInPixelsY              The resolution of the input in the Y dimension.
+/// @param [in] inputSizeInPixelsX                  The input size in pixels in the X dimension.
+/// @param [in] inputSizeInPixelsY                  The input size in pixels in the Y dimension.
+/// @param [in] outputSizeInPixelsX                 The output size in pixels in the X dimension.
+/// @param [in] outputSizeInPixelsY                 The output size in pixels in the Y dimension.
+/// @param [in] inputOffsetInPixelsX                The input image offset in the X dimension into the resource containing it (useful for dynamic resolution).
+/// @param [in] inputOffsetInPixelsY                The input image offset in the Y dimension into the resource containing it (useful for dynamic resolution).
+///
+/// @ingroup FfxGPUFsr1
+FFX_STATIC void ffxFsrPopulateEasuConstantsOffset(
+    FFX_PARAMETER_INOUT FfxUInt32x4 con0,
+    FFX_PARAMETER_INOUT FfxUInt32x4 con1,
+    FFX_PARAMETER_INOUT FfxUInt32x4 con2,
+    FFX_PARAMETER_INOUT FfxUInt32x4 con3,
+    FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsX,
+    FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsY,
+    FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsX,
+    FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsY,
+    FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsX,
+    FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsY,
+    FFX_PARAMETER_IN FfxFloat32 inputOffsetInPixelsX,
+    FFX_PARAMETER_IN FfxFloat32 inputOffsetInPixelsY)
+{
+    ffxFsrPopulateEasuConstants(
+        con0,
+        con1,
+        con2,
+        con3,
+        inputViewportInPixelsX,
+        inputViewportInPixelsY,
+        inputSizeInPixelsX,
+        inputSizeInPixelsY,
+        outputSizeInPixelsX,
+        outputSizeInPixelsY);
+
+    // override 
+    con0[2] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX) - FfxFloat32(0.5) + inputOffsetInPixelsX);
+    con0[3] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY) - FfxFloat32(0.5) + inputOffsetInPixelsY);
+}
+
+#if defined(FFX_GPU) && defined(FFX_FSR_EASU_FLOAT)
+// Input callback prototypes, need to be implemented by calling shader
+FfxFloat32x4 FsrEasuRF(FfxFloat32x2 p);
+FfxFloat32x4 FsrEasuGF(FfxFloat32x2 p);
+FfxFloat32x4 FsrEasuBF(FfxFloat32x2 p);
+
+// Filtering for a given tap for the scalar.
+void fsrEasuTapFloat(
+    FFX_PARAMETER_INOUT FfxFloat32x3 accumulatedColor,   // Accumulated color, with negative lobe.
+    FFX_PARAMETER_INOUT FfxFloat32 accumulatedWeight,    // Accumulated weight.
+    FFX_PARAMETER_IN FfxFloat32x2 pixelOffset,           // Pixel offset from resolve position to tap.
+    FFX_PARAMETER_IN FfxFloat32x2 gradientDirection,     // Gradient direction.
+    FFX_PARAMETER_IN FfxFloat32x2 length,                // Length.
+    FFX_PARAMETER_IN FfxFloat32 negativeLobeStrength,    // Negative lobe strength.
+    FFX_PARAMETER_IN FfxFloat32 clippingPoint,           // Clipping point.
+    FFX_PARAMETER_IN FfxFloat32x3 color)                 // Tap color.
+{
+    // Rotate offset by direction.
+    FfxFloat32x2 rotatedOffset;
+    rotatedOffset.x = (pixelOffset.x * (gradientDirection.x)) + (pixelOffset.y * gradientDirection.y);
+    rotatedOffset.y = (pixelOffset.x * (-gradientDirection.y)) + (pixelOffset.y * gradientDirection.x);
+
+    // Anisotropy.
+    rotatedOffset *= length;
+
+    // Compute distance^2.
+    FfxFloat32 distanceSquared = rotatedOffset.x * rotatedOffset.x + rotatedOffset.y * rotatedOffset.y;
+
+    // Limit to the window as at corner, 2 taps can easily be outside.
+    distanceSquared = ffxMin(distanceSquared, clippingPoint);
+
+    // Approximation of lancos2 without sin() or rcp(), or sqrt() to get x.
+    //  (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2
+    //  |_______________________________________|   |_______________|
+    //                   base                             window
+    // The general form of the 'base' is,
+    //  (a*(b*x^2-1)^2-(a-1))
+    // Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe.
+    FfxFloat32 weightB = FfxFloat32(2.0 / 5.0) * distanceSquared + FfxFloat32(-1.0);
+    FfxFloat32 weightA = negativeLobeStrength * distanceSquared + FfxFloat32(-1.0);
+    weightB *= weightB;
+    weightA *= weightA;
+    weightB = FfxFloat32(25.0 / 16.0) * weightB + FfxFloat32(-(25.0 / 16.0 - 1.0));
+    FfxFloat32 weight = weightB * weightA;
+
+    // Do weighted average.
+    accumulatedColor += color * weight;
+    accumulatedWeight += weight;
+}
+
+// Accumulate direction and length.
+void fsrEasuSetFloat(
+    FFX_PARAMETER_INOUT FfxFloat32x2 direction,
+    FFX_PARAMETER_INOUT FfxFloat32 length,
+    FFX_PARAMETER_IN FfxFloat32x2 pp,
+    FFX_PARAMETER_IN FfxBoolean biS,
+    FFX_PARAMETER_IN FfxBoolean biT,
+    FFX_PARAMETER_IN FfxBoolean biU,
+    FFX_PARAMETER_IN FfxBoolean biV,
+    FFX_PARAMETER_IN FfxFloat32 lA,
+    FFX_PARAMETER_IN FfxFloat32 lB,
+    FFX_PARAMETER_IN FfxFloat32 lC,
+    FFX_PARAMETER_IN FfxFloat32 lD,
+    FFX_PARAMETER_IN FfxFloat32 lE)
+{
+    // Compute bilinear weight, branches factor out as predicates are compiler time immediates.
+    //  s t
+    //  u v
+    FfxFloat32 weight = FfxFloat32(0.0);
+    if (biS)
+        weight = (FfxFloat32(1.0) - pp.x) * (FfxFloat32(1.0) - pp.y);
+    if (biT)
+        weight = pp.x * (FfxFloat32(1.0) - pp.y);
+    if (biU)
+        weight = (FfxFloat32(1.0) - pp.x) * pp.y;
+    if (biV)
+        weight = pp.x * pp.y;
+
+    // Direction is the '+' diff.
+    //    a
+    //  b c d
+    //    e
+    // Then takes magnitude from abs average of both sides of 'c'.
+    // Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms.
+    FfxFloat32 dc = lD - lC;
+    FfxFloat32 cb = lC - lB;
+    FfxFloat32 lengthX = max(abs(dc), abs(cb));
+    lengthX = ffxApproximateReciprocal(lengthX);
+    FfxFloat32 directionX = lD - lB;
+    direction.x += directionX * weight;
+    lengthX = ffxSaturate(abs(directionX) * lengthX);
+    lengthX *= lengthX;
+    length += lengthX * weight;
+
+    // Repeat for the y axis.
+    FfxFloat32 ec = lE - lC;
+    FfxFloat32 ca = lC - lA;
+    FfxFloat32 lengthY = max(abs(ec), abs(ca));
+    lengthY = ffxApproximateReciprocal(lengthY);
+    FfxFloat32 directionY = lE - lA;
+    direction.y += directionY * weight;
+    lengthY = ffxSaturate(abs(directionY) * lengthY);
+    lengthY *= lengthY;
+    length += lengthY * weight;
+}
+
+/// Apply edge-aware spatial upsampling using 32bit floating point precision calculations.
+///
+/// @param [out] outPixel               The computed color of a pixel.
+/// @param [in]  integerPosition        Integer pixel position within the output.
+/// @param [in]  con0                   The first constant value generated by <c><i>ffxFsrPopulateEasuConstants</i></c>.
+/// @param [in]  con1                   The second constant value generated by <c><i>ffxFsrPopulateEasuConstants</i></c>.
+/// @param [in]  con2                   The third constant value generated by <c><i>ffxFsrPopulateEasuConstants</i></c>.
+/// @param [in]  con3                   The fourth constant value generated by <c><i>ffxFsrPopulateEasuConstants</i></c>.
+/// 
+/// @ingroup FSR
+void ffxFsrEasuFloat(
+    FFX_PARAMETER_OUT FfxFloat32x3 pix,
+    FFX_PARAMETER_IN FfxUInt32x2 ip,
+    FFX_PARAMETER_IN FfxUInt32x4 con0,
+    FFX_PARAMETER_IN FfxUInt32x4 con1,
+    FFX_PARAMETER_IN FfxUInt32x4 con2,
+    FFX_PARAMETER_IN FfxUInt32x4 con3)
+{
+    // Get position of 'f'.
+    FfxFloat32x2 pp = FfxFloat32x2(ip) * ffxAsFloat(con0.xy) + ffxAsFloat(con0.zw);
+    FfxFloat32x2 fp = floor(pp);
+    pp -= fp;
+
+    // 12-tap kernel.
+    //    b c
+    //  e f g h
+    //  i j k l
+    //    n o
+    // Gather 4 ordering.
+    //  a b
+    //  r g
+    // For packed FP16, need either {rg} or {ab} so using the following setup for gather in all versions,
+    //    a b    <- unused (z)
+    //    r g
+    //  a b a b
+    //  r g r g
+    //    a b
+    //    r g    <- unused (z)
+    // Allowing dead-code removal to remove the 'z's.
+    FfxFloat32x2 p0 = fp * ffxAsFloat(con1.xy) + ffxAsFloat(con1.zw);
+
+    // These are from p0 to avoid pulling two constants on pre-Navi hardware.
+    FfxFloat32x2 p1    = p0 + ffxAsFloat(con2.xy);
+    FfxFloat32x2 p2    = p0 + ffxAsFloat(con2.zw);
+    FfxFloat32x2 p3    = p0 + ffxAsFloat(con3.xy);
+    FfxFloat32x4 bczzR = FsrEasuRF(p0);
+    FfxFloat32x4 bczzG = FsrEasuGF(p0);
+    FfxFloat32x4 bczzB = FsrEasuBF(p0);
+    FfxFloat32x4 ijfeR = FsrEasuRF(p1);
+    FfxFloat32x4 ijfeG = FsrEasuGF(p1);
+    FfxFloat32x4 ijfeB = FsrEasuBF(p1);
+    FfxFloat32x4 klhgR = FsrEasuRF(p2);
+    FfxFloat32x4 klhgG = FsrEasuGF(p2);
+    FfxFloat32x4 klhgB = FsrEasuBF(p2);
+    FfxFloat32x4 zzonR = FsrEasuRF(p3);
+    FfxFloat32x4 zzonG = FsrEasuGF(p3);
+    FfxFloat32x4 zzonB = FsrEasuBF(p3);
+
+    // Simplest multi-channel approximate luma possible (luma times 2, in 2 FMA/MAD).
+    FfxFloat32x4 bczzL = bczzB * ffxBroadcast4(0.5) + (bczzR * ffxBroadcast4(0.5) + bczzG);
+    FfxFloat32x4 ijfeL = ijfeB * ffxBroadcast4(0.5) + (ijfeR * ffxBroadcast4(0.5) + ijfeG);
+    FfxFloat32x4 klhgL = klhgB * ffxBroadcast4(0.5) + (klhgR * ffxBroadcast4(0.5) + klhgG);
+    FfxFloat32x4 zzonL = zzonB * ffxBroadcast4(0.5) + (zzonR * ffxBroadcast4(0.5) + zzonG);
+
+    // Rename.
+    FfxFloat32 bL = bczzL.x;
+    FfxFloat32 cL = bczzL.y;
+    FfxFloat32 iL = ijfeL.x;
+    FfxFloat32 jL = ijfeL.y;
+    FfxFloat32 fL = ijfeL.z;
+    FfxFloat32 eL = ijfeL.w;
+    FfxFloat32 kL = klhgL.x;
+    FfxFloat32 lL = klhgL.y;
+    FfxFloat32 hL = klhgL.z;
+    FfxFloat32 gL = klhgL.w;
+    FfxFloat32 oL = zzonL.z;
+    FfxFloat32 nL = zzonL.w;
+
+    // Accumulate for bilinear interpolation.
+    FfxFloat32x2 dir = ffxBroadcast2(0.0);
+    FfxFloat32  len = FfxFloat32(0.0);
+    fsrEasuSetFloat(dir, len, pp, FFX_TRUE,  FFX_FALSE, FFX_FALSE, FFX_FALSE, bL, eL, fL, gL, jL);
+    fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_TRUE,  FFX_FALSE, FFX_FALSE, cL, fL, gL, hL, kL);
+    fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_FALSE, FFX_TRUE,  FFX_FALSE, fL, iL, jL, kL, nL);
+    fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_FALSE, FFX_FALSE, FFX_TRUE,  gL, jL, kL, lL, oL);
+
+    // Normalize with approximation, and cleanup close to zero.
+    FfxFloat32x2 dir2 = dir * dir;
+    FfxFloat32 dirR = dir2.x + dir2.y;
+    FfxBoolean zro  = dirR < FfxFloat32(1.0 / 32768.0);
+    dirR = ffxApproximateReciprocalSquareRoot(dirR);
+    dirR = zro ? FfxFloat32(1.0) : dirR;
+    dir.x = zro ? FfxFloat32(1.0) : dir.x;
+    dir *= ffxBroadcast2(dirR);
+
+    // Transform from {0 to 2} to {0 to 1} range, and shape with square.
+    len = len * FfxFloat32(0.5);
+    len *= len;
+
+    // Stretch kernel {1.0 vert|horz, to sqrt(2.0) on diagonal}.
+    FfxFloat32 stretch = (dir.x * dir.x + dir.y * dir.y) * ffxApproximateReciprocal(max(abs(dir.x), abs(dir.y)));
+
+    // Anisotropic length after rotation,
+    //  x := 1.0 lerp to 'stretch' on edges
+    //  y := 1.0 lerp to 2x on edges
+    FfxFloat32x2 len2 = FfxFloat32x2(FfxFloat32(1.0) + (stretch - FfxFloat32(1.0)) * len, FfxFloat32(1.0) + FfxFloat32(-0.5) * len);
+
+    // Based on the amount of 'edge',
+    // the window shifts from +/-{sqrt(2.0) to slightly beyond 2.0}.
+    FfxFloat32 lob = FfxFloat32(0.5) + FfxFloat32((1.0 / 4.0 - 0.04) - 0.5) * len;
+
+    // Set distance^2 clipping point to the end of the adjustable window.
+    FfxFloat32 clp = ffxApproximateReciprocal(lob);
+
+    // Accumulation mixed with min/max of 4 nearest.
+    //    b c
+    //  e f g h
+    //  i j k l
+    //    n o
+    FfxFloat32x3 min4 =
+        ffxMin(ffxMin3(FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z), FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w), FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)),
+               FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x));
+    FfxFloat32x3 max4 =
+        max(ffxMax3(FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z), FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w), FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)), FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x));
+
+    // Accumulation.
+    FfxFloat32x3 aC = ffxBroadcast3(0.0);
+    FfxFloat32  aW = FfxFloat32(0.0);
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, -1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(bczzR.x, bczzG.x, bczzB.x));  // b
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, -1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(bczzR.y, bczzG.y, bczzB.y));  // c
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(-1.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.x, ijfeG.x, ijfeB.x));  // i
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y));   // j
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z));   // f
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(-1.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.w, ijfeG.w, ijfeB.w));  // e
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x));   // k
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(2.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.y, klhgG.y, klhgB.y));   // l
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(2.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.z, klhgG.z, klhgB.z));   // h
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w));   // g
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.z, zzonG.z, zzonB.z));   // o
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.w, zzonG.w, zzonB.w));   // n
+
+    // Normalize and dering.
+    pix = ffxMin(max4, max(min4, aC * ffxBroadcast3(rcp(aW))));
+}
+#endif // #if defined(FFX_GPU) && defined(FFX_FSR_EASU_FLOAT)
+
+#if defined(FFX_GPU) && FFX_HALF == 1 && defined(FFX_FSR_EASU_HALF)
+// Input callback prototypes, need to be implemented by calling shader
+FfxFloat16x4 FsrEasuRH(FfxFloat32x2 p);
+FfxFloat16x4 FsrEasuGH(FfxFloat32x2 p);
+FfxFloat16x4 FsrEasuBH(FfxFloat32x2 p);
+
+// This runs 2 taps in parallel.
+void FsrEasuTapH(
+    FFX_PARAMETER_INOUT FfxFloat16x2 aCR,
+    FFX_PARAMETER_INOUT FfxFloat16x2 aCG,
+    FFX_PARAMETER_INOUT FfxFloat16x2 aCB,
+    FFX_PARAMETER_INOUT FfxFloat16x2 aW,
+    FFX_PARAMETER_IN FfxFloat16x2 offX,
+    FFX_PARAMETER_IN FfxFloat16x2 offY,
+    FFX_PARAMETER_IN FfxFloat16x2 dir,
+    FFX_PARAMETER_IN FfxFloat16x2 len,
+    FFX_PARAMETER_IN FfxFloat16 lob,
+    FFX_PARAMETER_IN FfxFloat16 clp,
+    FFX_PARAMETER_IN FfxFloat16x2 cR,
+    FFX_PARAMETER_IN FfxFloat16x2 cG,
+    FFX_PARAMETER_IN FfxFloat16x2 cB)
+{
+    FfxFloat16x2 vX, vY;
+    vX = offX * dir.xx + offY * dir.yy;
+    vY = offX * (-dir.yy) + offY * dir.xx;
+    vX *= len.x;
+    vY *= len.y;
+    FfxFloat16x2 d2 = vX * vX + vY * vY;
+    d2              = min(d2, FFX_BROADCAST_FLOAT16X2(clp));
+    FfxFloat16x2 wB = FFX_BROADCAST_FLOAT16X2(2.0 / 5.0) * d2 + FFX_BROADCAST_FLOAT16X2(-1.0);
+    FfxFloat16x2 wA = FFX_BROADCAST_FLOAT16X2(lob) * d2 + FFX_BROADCAST_FLOAT16X2(-1.0);
+    wB *= wB;
+    wA *= wA;
+    wB             = FFX_BROADCAST_FLOAT16X2(25.0 / 16.0) * wB + FFX_BROADCAST_FLOAT16X2(-(25.0 / 16.0 - 1.0));
+    FfxFloat16x2 w = wB * wA;
+    aCR += cR * w;
+    aCG += cG * w;
+    aCB += cB * w;
+    aW += w;
+}
+
+// This runs 2 taps in parallel.
+void FsrEasuSetH(
+    FFX_PARAMETER_INOUT FfxFloat16x2 dirPX,
+    FFX_PARAMETER_INOUT FfxFloat16x2  dirPY,
+    FFX_PARAMETER_INOUT FfxFloat16x2 lenP,
+    FFX_PARAMETER_IN FfxFloat16x2 pp,
+    FFX_PARAMETER_IN FfxBoolean biST,
+    FFX_PARAMETER_IN FfxBoolean biUV,
+    FFX_PARAMETER_IN FfxFloat16x2 lA,
+    FFX_PARAMETER_IN FfxFloat16x2 lB,
+    FFX_PARAMETER_IN FfxFloat16x2 lC,
+    FFX_PARAMETER_IN FfxFloat16x2 lD,
+    FFX_PARAMETER_IN FfxFloat16x2 lE)
+{
+    FfxFloat16x2 w = FFX_BROADCAST_FLOAT16X2(0.0);
+    
+    if (biST)
+        w = (FfxFloat16x2(1.0, 0.0) + FfxFloat16x2(-pp.x, pp.x)) * FFX_BROADCAST_FLOAT16X2(FFX_BROADCAST_FLOAT16(1.0) - pp.y);
+
+    if (biUV)
+        w = (FfxFloat16x2(1.0, 0.0) + FfxFloat16x2(-pp.x, pp.x)) * FFX_BROADCAST_FLOAT16X2(pp.y);
+
+    // ABS is not free in the packed FP16 path.
+    FfxFloat16x2 dc   = lD - lC;
+    FfxFloat16x2 cb   = lC - lB;
+    FfxFloat16x2 lenX = max(abs(dc), abs(cb));
+    lenX              = ffxReciprocalHalf(lenX);
+
+    FfxFloat16x2 dirX = lD - lB;
+    dirPX += dirX * w;
+    lenX = FfxFloat16x2(ffxSaturate(abs(dirX) * lenX));
+    lenX *= lenX;
+    lenP += lenX * w;
+    FfxFloat16x2 ec   = lE - lC;
+    FfxFloat16x2 ca   = lC - lA;
+    FfxFloat16x2 lenY = max(abs(ec), abs(ca));
+    lenY              = ffxReciprocalHalf(lenY);
+    FfxFloat16x2 dirY = lE - lA;
+    dirPY += dirY * w;
+    lenY = FfxFloat16x2(ffxSaturate(abs(dirY) * lenY));
+    lenY *= lenY;
+    lenP += lenY * w;
+}
+
+void FsrEasuH(
+    FFX_PARAMETER_OUT FfxFloat16x3 pix, 
+    FFX_PARAMETER_IN FfxUInt32x2 ip,
+    FFX_PARAMETER_IN FfxUInt32x4 con0,
+    FFX_PARAMETER_IN FfxUInt32x4 con1,
+    FFX_PARAMETER_IN FfxUInt32x4 con2,
+    FFX_PARAMETER_IN FfxUInt32x4 con3)
+{
+    FfxFloat32x2 pp = FfxFloat32x2(ip) * ffxAsFloat(con0.xy) + ffxAsFloat(con0.zw);
+    FfxFloat32x2 fp = floor(pp);
+    pp -= fp;
+    FfxFloat16x2 ppp = FfxFloat16x2(pp);
+
+    FfxFloat32x2 p0    = fp * ffxAsFloat(con1.xy) + ffxAsFloat(con1.zw);
+    FfxFloat32x2 p1    = p0 + ffxAsFloat(con2.xy);
+    FfxFloat32x2 p2    = p0 + ffxAsFloat(con2.zw);
+    FfxFloat32x2 p3    = p0 + ffxAsFloat(con3.xy);
+    FfxFloat16x4 bczzR = FsrEasuRH(p0);
+    FfxFloat16x4 bczzG = FsrEasuGH(p0);
+    FfxFloat16x4 bczzB = FsrEasuBH(p0);
+    FfxFloat16x4 ijfeR = FsrEasuRH(p1);
+    FfxFloat16x4 ijfeG = FsrEasuGH(p1);
+    FfxFloat16x4 ijfeB = FsrEasuBH(p1);
+    FfxFloat16x4 klhgR = FsrEasuRH(p2);
+    FfxFloat16x4 klhgG = FsrEasuGH(p2);
+    FfxFloat16x4 klhgB = FsrEasuBH(p2);
+    FfxFloat16x4 zzonR = FsrEasuRH(p3);
+    FfxFloat16x4 zzonG = FsrEasuGH(p3);
+    FfxFloat16x4 zzonB = FsrEasuBH(p3);
+
+    FfxFloat16x4 bczzL = bczzB * FFX_BROADCAST_FLOAT16X4(0.5) + (bczzR * FFX_BROADCAST_FLOAT16X4(0.5) + bczzG);
+    FfxFloat16x4 ijfeL = ijfeB * FFX_BROADCAST_FLOAT16X4(0.5) + (ijfeR * FFX_BROADCAST_FLOAT16X4(0.5) + ijfeG);
+    FfxFloat16x4 klhgL = klhgB * FFX_BROADCAST_FLOAT16X4(0.5) + (klhgR * FFX_BROADCAST_FLOAT16X4(0.5) + klhgG);
+    FfxFloat16x4 zzonL = zzonB * FFX_BROADCAST_FLOAT16X4(0.5) + (zzonR * FFX_BROADCAST_FLOAT16X4(0.5) + zzonG);
+    FfxFloat16   bL    = bczzL.x;
+    FfxFloat16   cL    = bczzL.y;
+    FfxFloat16   iL    = ijfeL.x;
+    FfxFloat16   jL    = ijfeL.y;
+    FfxFloat16   fL    = ijfeL.z;
+    FfxFloat16   eL    = ijfeL.w;
+    FfxFloat16   kL    = klhgL.x;
+    FfxFloat16   lL    = klhgL.y;
+    FfxFloat16   hL    = klhgL.z;
+    FfxFloat16   gL    = klhgL.w;
+    FfxFloat16   oL    = zzonL.z;
+    FfxFloat16   nL    = zzonL.w;
+
+    // This part is different, accumulating 2 taps in parallel.
+    FfxFloat16x2 dirPX = FFX_BROADCAST_FLOAT16X2(0.0);
+    FfxFloat16x2 dirPY = FFX_BROADCAST_FLOAT16X2(0.0);
+    FfxFloat16x2 lenP  = FFX_BROADCAST_FLOAT16X2(0.0);
+    FsrEasuSetH(dirPX,
+                dirPY,
+                lenP,
+                ppp,
+                FfxBoolean(true),
+                FfxBoolean(false),
+                FfxFloat16x2(bL, cL),
+                FfxFloat16x2(eL, fL),
+                FfxFloat16x2(fL, gL),
+                FfxFloat16x2(gL, hL),
+                FfxFloat16x2(jL, kL));
+    FsrEasuSetH(dirPX,
+                dirPY,
+                lenP,
+                ppp,
+                FfxBoolean(false),
+                FfxBoolean(true),
+                FfxFloat16x2(fL, gL),
+                FfxFloat16x2(iL, jL),
+                FfxFloat16x2(jL, kL),
+                FfxFloat16x2(kL, lL),
+                FfxFloat16x2(nL, oL));
+    FfxFloat16x2 dir = FfxFloat16x2(dirPX.r + dirPX.g, dirPY.r + dirPY.g);
+    FfxFloat16   len = lenP.r + lenP.g;
+
+    FfxFloat16x2 dir2 = dir * dir;
+    FfxFloat16   dirR = dir2.x + dir2.y;
+    FfxUInt32    zro  = FfxUInt32(dirR < FFX_BROADCAST_FLOAT16(1.0 / 32768.0));
+    dirR              = ffxApproximateReciprocalSquareRootHalf(dirR);
+    dirR              = (zro > 0) ? FFX_BROADCAST_FLOAT16(1.0) : dirR;
+    dir.x             = (zro > 0) ? FFX_BROADCAST_FLOAT16(1.0) : dir.x;
+    dir *= FFX_BROADCAST_FLOAT16X2(dirR);
+    len = len * FFX_BROADCAST_FLOAT16(0.5);
+    len *= len;
+    FfxFloat16   stretch = (dir.x * dir.x + dir.y * dir.y) * ffxApproximateReciprocalHalf(max(abs(dir.x), abs(dir.y)));
+    FfxFloat16x2 len2 =
+        FfxFloat16x2(FFX_BROADCAST_FLOAT16(1.0) + (stretch - FFX_BROADCAST_FLOAT16(1.0)) * len, FFX_BROADCAST_FLOAT16(1.0) + FFX_BROADCAST_FLOAT16(-0.5) * len);
+    FfxFloat16 lob = FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16((1.0 / 4.0 - 0.04) - 0.5) * len;
+    FfxFloat16 clp = ffxApproximateReciprocalHalf(lob);
+
+    // FP16 is different, using packed trick to do min and max in same operation.
+    FfxFloat16x2 bothR =
+        max(max(FfxFloat16x2(-ijfeR.z, ijfeR.z), FfxFloat16x2(-klhgR.w, klhgR.w)), max(FfxFloat16x2(-ijfeR.y, ijfeR.y), FfxFloat16x2(-klhgR.x, klhgR.x)));
+    FfxFloat16x2 bothG =
+        max(max(FfxFloat16x2(-ijfeG.z, ijfeG.z), FfxFloat16x2(-klhgG.w, klhgG.w)), max(FfxFloat16x2(-ijfeG.y, ijfeG.y), FfxFloat16x2(-klhgG.x, klhgG.x)));
+    FfxFloat16x2 bothB =
+        max(max(FfxFloat16x2(-ijfeB.z, ijfeB.z), FfxFloat16x2(-klhgB.w, klhgB.w)), max(FfxFloat16x2(-ijfeB.y, ijfeB.y), FfxFloat16x2(-klhgB.x, klhgB.x)));
+
+    // This part is different for FP16, working pairs of taps at a time.
+    FfxFloat16x2 pR = FFX_BROADCAST_FLOAT16X2(0.0);
+    FfxFloat16x2 pG = FFX_BROADCAST_FLOAT16X2(0.0);
+    FfxFloat16x2 pB = FFX_BROADCAST_FLOAT16X2(0.0);
+    FfxFloat16x2 pW = FFX_BROADCAST_FLOAT16X2(0.0);
+    FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(0.0, 1.0) - ppp.xx, FfxFloat16x2(-1.0, -1.0) - ppp.yy, dir, len2, lob, clp, bczzR.xy, bczzG.xy, bczzB.xy);
+    FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(-1.0, 0.0) - ppp.xx, FfxFloat16x2(1.0, 1.0) - ppp.yy, dir, len2, lob, clp, ijfeR.xy, ijfeG.xy, ijfeB.xy);
+    FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(0.0, -1.0) - ppp.xx, FfxFloat16x2(0.0, 0.0) - ppp.yy, dir, len2, lob, clp, ijfeR.zw, ijfeG.zw, ijfeB.zw);
+    FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(1.0, 2.0) - ppp.xx, FfxFloat16x2(1.0, 1.0) - ppp.yy, dir, len2, lob, clp, klhgR.xy, klhgG.xy, klhgB.xy);
+    FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(2.0, 1.0) - ppp.xx, FfxFloat16x2(0.0, 0.0) - ppp.yy, dir, len2, lob, clp, klhgR.zw, klhgG.zw, klhgB.zw);
+    FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(1.0, 0.0) - ppp.xx, FfxFloat16x2(2.0, 2.0) - ppp.yy, dir, len2, lob, clp, zzonR.zw, zzonG.zw, zzonB.zw);
+    FfxFloat16x3 aC = FfxFloat16x3(pR.x + pR.y, pG.x + pG.y, pB.x + pB.y);
+    FfxFloat16   aW = pW.x + pW.y;
+
+    // Slightly different for FP16 version due to combined min and max.
+    pix = min(FfxFloat16x3(bothR.y, bothG.y, bothB.y), max(-FfxFloat16x3(bothR.x, bothG.x, bothB.x), aC * FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(aW))));
+}
+#endif // #if defined(FFX_GPU) && defined(FFX_HALF) && defined(FFX_FSR_EASU_HALF)
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//                                      FSR - [RCAS] ROBUST CONTRAST ADAPTIVE SHARPENING
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// CAS uses a simplified mechanism to convert local contrast into a variable amount of sharpness.
+// RCAS uses a more exact mechanism, solving for the maximum local sharpness possible before clipping.
+// RCAS also has a built in process to limit sharpening of what it detects as possible noise.
+// RCAS sharper does not support scaling, as it should be applied after EASU scaling.
+// Pass EASU output straight into RCAS, no color conversions necessary.
+//------------------------------------------------------------------------------------------------------------------------------
+// RCAS is based on the following logic.
+// RCAS uses a 5 tap filter in a cross pattern (same as CAS),
+//    w                n
+//  w 1 w  for taps  w m e 
+//    w                s
+// Where 'w' is the negative lobe weight.
+//  output = (w*(n+e+w+s)+m)/(4*w+1)
+// RCAS solves for 'w' by seeing where the signal might clip out of the {0 to 1} input range,
+//  0 == (w*(n+e+w+s)+m)/(4*w+1) -> w = -m/(n+e+w+s)
+//  1 == (w*(n+e+w+s)+m)/(4*w+1) -> w = (1-m)/(n+e+w+s-4*1)
+// Then chooses the 'w' which results in no clipping, limits 'w', and multiplies by the 'sharp' amount.
+// This solution above has issues with MSAA input as the steps along the gradient cause edge detection issues.
+// So RCAS uses 4x the maximum and 4x the minimum (depending on equation)in place of the individual taps.
+// As well as switching from 'm' to either the minimum or maximum (depending on side), to help in energy conservation.
+// This stabilizes RCAS.
+// RCAS does a simple highpass which is normalized against the local contrast then shaped,
+//       0.25
+//  0.25  -1  0.25
+//       0.25
+// This is used as a noise detection filter, to reduce the effect of RCAS on grain, and focus on real edges.
+//
+//  GLSL example for the required callbacks :
+// 
+//  FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p){return FfxFloat16x4(imageLoad(imgSrc,FfxInt32x2(p)));}
+//  void FsrRcasInputH(inout FfxFloat16 r,inout FfxFloat16 g,inout FfxFloat16 b)
+//  {
+//    //do any simple input color conversions here or leave empty if none needed
+//  }
+//  
+//  FsrRcasCon need to be called from the CPU or GPU to set up constants.
+//  Including a GPU example here, the 'con' value would be stored out to a constant buffer.
+// 
+//  FfxUInt32x4 con;
+//  FsrRcasCon(con,
+//   0.0); // The scale is {0.0 := maximum sharpness, to N>0, where N is the number of stops (halving) of the reduction of sharpness}.
+// ---------------
+// RCAS sharpening supports a CAS-like pass-through alpha via,
+//  #define FSR_RCAS_PASSTHROUGH_ALPHA 1
+// RCAS also supports a define to enable a more expensive path to avoid some sharpening of noise.
+// Would suggest it is better to apply film grain after RCAS sharpening (and after scaling) instead of using this define,
+//  #define FSR_RCAS_DENOISE 1
+//==============================================================================================================================
+// This is set at the limit of providing unnatural results for sharpening.
+#define FSR_RCAS_LIMIT (0.25-(1.0/16.0))
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                      CONSTANT SETUP
+//==============================================================================================================================
+// Call to setup required constant values (works on CPU or GPU).
+ FFX_STATIC void FsrRcasCon(FfxUInt32x4 con,
+                            // The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}.
+                            FfxFloat32 sharpness)
+ {
+     // Transform from stops to linear value.
+     sharpness = exp2(-sharpness);
+     FfxFloat32x2 hSharp  = {sharpness, sharpness};
+     con[0] = ffxAsUInt32(sharpness);
+     con[1] = packHalf2x16(hSharp);
+     con[2] = 0;
+     con[3] = 0;
+ }
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                   NON-PACKED 32-BIT VERSION
+//==============================================================================================================================
+#if defined(FFX_GPU)&&defined(FSR_RCAS_F)
+ // Input callback prototypes that need to be implemented by calling shader
+ FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p);
+ void FsrRcasInputF(inout FfxFloat32 r,inout FfxFloat32 g,inout FfxFloat32 b);
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrRcasF(out FfxFloat32 pixR,  // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy.
+               out FfxFloat32 pixG,
+               out FfxFloat32 pixB,
+#ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+               out FfxFloat32 pixA,
+#endif
+               FfxUInt32x2 ip,  // Integer pixel position in output.
+               FfxUInt32x4 con)
+ {  // Constant generated by RcasSetup().
+     // Algorithm uses minimal 3x3 pixel neighborhood.
+     //    b
+     //  d e f
+     //    h
+     FfxInt32x2   sp = FfxInt32x2(ip);
+     FfxFloat32x3 b  = FsrRcasLoadF(sp + FfxInt32x2(0, -1)).rgb;
+     FfxFloat32x3 d  = FsrRcasLoadF(sp + FfxInt32x2(-1, 0)).rgb;
+#ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+     FfxFloat32x4 ee = FsrRcasLoadF(sp);
+     FfxFloat32x3 e  = ee.rgb;
+     pixA            = ee.a;
+#else
+     FfxFloat32x3 e = FsrRcasLoadF(sp).rgb;
+#endif
+     FfxFloat32x3 f = FsrRcasLoadF(sp + FfxInt32x2(1, 0)).rgb;
+     FfxFloat32x3 h = FsrRcasLoadF(sp + FfxInt32x2(0, 1)).rgb;
+     // Rename (32-bit) or regroup (16-bit).
+     FfxFloat32 bR = b.r;
+     FfxFloat32 bG = b.g;
+     FfxFloat32 bB = b.b;
+     FfxFloat32 dR = d.r;
+     FfxFloat32 dG = d.g;
+     FfxFloat32 dB = d.b;
+     FfxFloat32 eR = e.r;
+     FfxFloat32 eG = e.g;
+     FfxFloat32 eB = e.b;
+     FfxFloat32 fR = f.r;
+     FfxFloat32 fG = f.g;
+     FfxFloat32 fB = f.b;
+     FfxFloat32 hR = h.r;
+     FfxFloat32 hG = h.g;
+     FfxFloat32 hB = h.b;
+     // Run optional input transform.
+     FsrRcasInputF(bR, bG, bB);
+     FsrRcasInputF(dR, dG, dB);
+     FsrRcasInputF(eR, eG, eB);
+     FsrRcasInputF(fR, fG, fB);
+     FsrRcasInputF(hR, hG, hB);
+     // Luma times 2.
+     FfxFloat32 bL = bB * FfxFloat32(0.5) + (bR * FfxFloat32(0.5) + bG);
+     FfxFloat32 dL = dB * FfxFloat32(0.5) + (dR * FfxFloat32(0.5) + dG);
+     FfxFloat32 eL = eB * FfxFloat32(0.5) + (eR * FfxFloat32(0.5) + eG);
+     FfxFloat32 fL = fB * FfxFloat32(0.5) + (fR * FfxFloat32(0.5) + fG);
+     FfxFloat32 hL = hB * FfxFloat32(0.5) + (hR * FfxFloat32(0.5) + hG);
+     // Noise detection.
+     FfxFloat32 nz = FfxFloat32(0.25) * bL + FfxFloat32(0.25) * dL + FfxFloat32(0.25) * fL + FfxFloat32(0.25) * hL - eL;
+     nz            = ffxSaturate(abs(nz) * ffxApproximateReciprocalMedium(ffxMax3(ffxMax3(bL, dL, eL), fL, hL) - ffxMin3(ffxMin3(bL, dL, eL), fL, hL)));
+     nz            = FfxFloat32(-0.5) * nz + FfxFloat32(1.0);
+     // Min and max of ring.
+     FfxFloat32 mn4R = ffxMin(ffxMin3(bR, dR, fR), hR);
+     FfxFloat32 mn4G = ffxMin(ffxMin3(bG, dG, fG), hG);
+     FfxFloat32 mn4B = ffxMin(ffxMin3(bB, dB, fB), hB);
+     FfxFloat32 mx4R = max(ffxMax3(bR, dR, fR), hR);
+     FfxFloat32 mx4G = max(ffxMax3(bG, dG, fG), hG);
+     FfxFloat32 mx4B = max(ffxMax3(bB, dB, fB), hB);
+     // Immediate constants for peak range.
+     FfxFloat32x2 peakC = FfxFloat32x2(1.0, -1.0 * 4.0);
+     // Limiters, these need to be high precision RCPs.
+     FfxFloat32 hitMinR = mn4R * rcp(FfxFloat32(4.0) * mx4R);
+     FfxFloat32 hitMinG = mn4G * rcp(FfxFloat32(4.0) * mx4G);
+     FfxFloat32 hitMinB = mn4B * rcp(FfxFloat32(4.0) * mx4B);
+     FfxFloat32 hitMaxR = (peakC.x - mx4R) * rcp(FfxFloat32(4.0) * mn4R + peakC.y);
+     FfxFloat32 hitMaxG = (peakC.x - mx4G) * rcp(FfxFloat32(4.0) * mn4G + peakC.y);
+     FfxFloat32 hitMaxB = (peakC.x - mx4B) * rcp(FfxFloat32(4.0) * mn4B + peakC.y);
+     FfxFloat32 lobeR   = max(-hitMinR, hitMaxR);
+     FfxFloat32 lobeG   = max(-hitMinG, hitMaxG);
+     FfxFloat32 lobeB   = max(-hitMinB, hitMaxB);
+     FfxFloat32 lobe    = max(FfxFloat32(-FSR_RCAS_LIMIT), ffxMin(ffxMax3(lobeR, lobeG, lobeB), FfxFloat32(0.0))) * ffxAsFloat
+     (con.x);
+ // Apply noise removal.
+#ifdef FSR_RCAS_DENOISE
+     lobe *= nz;
+#endif
+     // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
+     FfxFloat32 rcpL = ffxApproximateReciprocalMedium(FfxFloat32(4.0) * lobe + FfxFloat32(1.0));
+     pixR            = (lobe * bR + lobe * dR + lobe * hR + lobe * fR + eR) * rcpL;
+     pixG            = (lobe * bG + lobe * dG + lobe * hG + lobe * fG + eG) * rcpL;
+     pixB            = (lobe * bB + lobe * dB + lobe * hB + lobe * fB + eB) * rcpL;
+     return;
+ }
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                  NON-PACKED 16-BIT VERSION
+//==============================================================================================================================
+#if defined(FFX_GPU) && FFX_HALF == 1 && defined(FSR_RCAS_H)
+ // Input callback prototypes that need to be implemented by calling shader
+ FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p);
+ void FsrRcasInputH(inout FfxFloat16 r,inout FfxFloat16 g,inout FfxFloat16 b);
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrRcasH(
+ out FfxFloat16 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy.
+ out FfxFloat16 pixG,
+ out FfxFloat16 pixB,
+ #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+  out FfxFloat16 pixA,
+ #endif
+ FfxUInt32x2 ip, // Integer pixel position in output.
+ FfxUInt32x4 con){ // Constant generated by RcasSetup().
+  // Sharpening algorithm uses minimal 3x3 pixel neighborhood.
+  //    b 
+  //  d e f
+  //    h
+  FfxInt16x2 sp=FfxInt16x2(ip);
+  FfxFloat16x3 b=FsrRcasLoadH(sp+FfxInt16x2( 0,-1)).rgb;
+  FfxFloat16x3 d=FsrRcasLoadH(sp+FfxInt16x2(-1, 0)).rgb;
+  #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+   FfxFloat16x4 ee=FsrRcasLoadH(sp);
+   FfxFloat16x3 e=ee.rgb;pixA=ee.a;
+  #else
+   FfxFloat16x3 e=FsrRcasLoadH(sp).rgb;
+  #endif
+  FfxFloat16x3 f=FsrRcasLoadH(sp+FfxInt16x2( 1, 0)).rgb;
+  FfxFloat16x3 h=FsrRcasLoadH(sp+FfxInt16x2( 0, 1)).rgb;
+  // Rename (32-bit) or regroup (16-bit).
+  FfxFloat16 bR=b.r;
+  FfxFloat16 bG=b.g;
+  FfxFloat16 bB=b.b;
+  FfxFloat16 dR=d.r;
+  FfxFloat16 dG=d.g;
+  FfxFloat16 dB=d.b;
+  FfxFloat16 eR=e.r;
+  FfxFloat16 eG=e.g;
+  FfxFloat16 eB=e.b;
+  FfxFloat16 fR=f.r;
+  FfxFloat16 fG=f.g;
+  FfxFloat16 fB=f.b;
+  FfxFloat16 hR=h.r;
+  FfxFloat16 hG=h.g;
+  FfxFloat16 hB=h.b;
+  // Run optional input transform.
+  FsrRcasInputH(bR,bG,bB);
+  FsrRcasInputH(dR,dG,dB);
+  FsrRcasInputH(eR,eG,eB);
+  FsrRcasInputH(fR,fG,fB);
+  FsrRcasInputH(hR,hG,hB);
+  // Luma times 2.
+  FfxFloat16 bL=bB*FFX_BROADCAST_FLOAT16(0.5)+(bR*FFX_BROADCAST_FLOAT16(0.5)+bG);
+  FfxFloat16 dL=dB*FFX_BROADCAST_FLOAT16(0.5)+(dR*FFX_BROADCAST_FLOAT16(0.5)+dG);
+  FfxFloat16 eL=eB*FFX_BROADCAST_FLOAT16(0.5)+(eR*FFX_BROADCAST_FLOAT16(0.5)+eG);
+  FfxFloat16 fL=fB*FFX_BROADCAST_FLOAT16(0.5)+(fR*FFX_BROADCAST_FLOAT16(0.5)+fG);
+  FfxFloat16 hL=hB*FFX_BROADCAST_FLOAT16(0.5)+(hR*FFX_BROADCAST_FLOAT16(0.5)+hG);
+  // Noise detection.
+  FfxFloat16 nz=FFX_BROADCAST_FLOAT16(0.25)*bL+FFX_BROADCAST_FLOAT16(0.25)*dL+FFX_BROADCAST_FLOAT16(0.25)*fL+FFX_BROADCAST_FLOAT16(0.25)*hL-eL;
+  nz=FfxFloat16(ffxSaturate(abs(nz)*ffxApproximateReciprocalMediumHalf(ffxMax3Half(ffxMax3Half(bL,dL,eL),fL,hL)-ffxMin3Half(ffxMin3Half(bL,dL,eL),fL,hL))));
+  nz=FFX_BROADCAST_FLOAT16(-0.5)*nz+FFX_BROADCAST_FLOAT16(1.0);
+  // Min and max of ring.
+  FfxFloat16 mn4R=min(ffxMin3Half(bR,dR,fR),hR);
+  FfxFloat16 mn4G=min(ffxMin3Half(bG,dG,fG),hG);
+  FfxFloat16 mn4B=min(ffxMin3Half(bB,dB,fB),hB);
+  FfxFloat16 mx4R=max(ffxMax3Half(bR,dR,fR),hR);
+  FfxFloat16 mx4G=max(ffxMax3Half(bG,dG,fG),hG);
+  FfxFloat16 mx4B=max(ffxMax3Half(bB,dB,fB),hB);
+  // Immediate constants for peak range.
+  FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0);
+  // Limiters, these need to be high precision RCPs.
+  FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4R);
+  FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4G);
+  FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4B);
+  FfxFloat16 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4R+peakC.y);
+  FfxFloat16 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4G+peakC.y);
+  FfxFloat16 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4B+peakC.y);
+  FfxFloat16 lobeR=max(-hitMinR,hitMaxR);
+  FfxFloat16 lobeG=max(-hitMinG,hitMaxG);
+  FfxFloat16 lobeB=max(-hitMinB,hitMaxB);
+  FfxFloat16 lobe=max(FFX_BROADCAST_FLOAT16(-FSR_RCAS_LIMIT),min(ffxMax3Half(lobeR,lobeG,lobeB),FFX_BROADCAST_FLOAT16(0.0)))*FFX_UINT32_TO_FLOAT16X2(con.y).x;
+  // Apply noise removal.
+  #ifdef FSR_RCAS_DENOISE
+   lobe*=nz;
+  #endif
+  // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
+  FfxFloat16 rcpL=ffxApproximateReciprocalMediumHalf(FFX_BROADCAST_FLOAT16(4.0)*lobe+FFX_BROADCAST_FLOAT16(1.0));
+  pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL;
+  pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL;
+  pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;
+}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                     PACKED 16-BIT VERSION
+//==============================================================================================================================
+#if defined(FFX_GPU)&& FFX_HALF == 1 && defined(FSR_RCAS_HX2)
+ // Input callback prototypes that need to be implemented by the calling shader
+ FfxFloat16x4 FsrRcasLoadHx2(FfxInt16x2 p);
+ void FsrRcasInputHx2(inout FfxFloat16x2 r,inout FfxFloat16x2 g,inout FfxFloat16x2 b);
+//------------------------------------------------------------------------------------------------------------------------------
+ // Can be used to convert from packed Structures of Arrays to Arrays of Structures for store.
+ void FsrRcasDepackHx2(out FfxFloat16x4 pix0,out FfxFloat16x4 pix1,FfxFloat16x2 pixR,FfxFloat16x2 pixG,FfxFloat16x2 pixB){
+  #ifdef FFX_HLSL
+   // Invoke a slower path for DX only, since it won't allow uninitialized values.
+   pix0.a=pix1.a=0.0;
+  #endif
+  pix0.rgb=FfxFloat16x3(pixR.x,pixG.x,pixB.x);
+  pix1.rgb=FfxFloat16x3(pixR.y,pixG.y,pixB.y);}
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrRcasHx2(
+ // Output values are for 2 8x8 tiles in a 16x8 region.
+ //  pix<R,G,B>.x =  left 8x8 tile
+ //  pix<R,G,B>.y = right 8x8 tile
+ // This enables later processing to easily be packed as well.
+ out FfxFloat16x2 pixR,
+ out FfxFloat16x2 pixG,
+ out FfxFloat16x2 pixB,
+ #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+  out FfxFloat16x2 pixA,
+ #endif
+ FfxUInt32x2 ip, // Integer pixel position in output.
+ FfxUInt32x4 con){ // Constant generated by RcasSetup().
+  // No scaling algorithm uses minimal 3x3 pixel neighborhood.
+  FfxInt16x2 sp0=FfxInt16x2(ip);
+  FfxFloat16x3 b0=FsrRcasLoadHx2(sp0+FfxInt16x2( 0,-1)).rgb;
+  FfxFloat16x3 d0=FsrRcasLoadHx2(sp0+FfxInt16x2(-1, 0)).rgb;
+  #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+   FfxFloat16x4 ee0=FsrRcasLoadHx2(sp0);
+   FfxFloat16x3 e0=ee0.rgb;pixA.r=ee0.a;
+  #else
+   FfxFloat16x3 e0=FsrRcasLoadHx2(sp0).rgb;
+  #endif
+  FfxFloat16x3 f0=FsrRcasLoadHx2(sp0+FfxInt16x2( 1, 0)).rgb;
+  FfxFloat16x3 h0=FsrRcasLoadHx2(sp0+FfxInt16x2( 0, 1)).rgb;
+  FfxInt16x2 sp1=sp0+FfxInt16x2(8,0);
+  FfxFloat16x3 b1=FsrRcasLoadHx2(sp1+FfxInt16x2( 0,-1)).rgb;
+  FfxFloat16x3 d1=FsrRcasLoadHx2(sp1+FfxInt16x2(-1, 0)).rgb;
+  #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+   FfxFloat16x4 ee1=FsrRcasLoadHx2(sp1);
+   FfxFloat16x3 e1=ee1.rgb;pixA.g=ee1.a;
+  #else
+   FfxFloat16x3 e1=FsrRcasLoadHx2(sp1).rgb;
+  #endif
+  FfxFloat16x3 f1=FsrRcasLoadHx2(sp1+FfxInt16x2( 1, 0)).rgb;
+  FfxFloat16x3 h1=FsrRcasLoadHx2(sp1+FfxInt16x2( 0, 1)).rgb;
+  // Arrays of Structures to Structures of Arrays conversion.
+  FfxFloat16x2 bR=FfxFloat16x2(b0.r,b1.r);
+  FfxFloat16x2 bG=FfxFloat16x2(b0.g,b1.g);
+  FfxFloat16x2 bB=FfxFloat16x2(b0.b,b1.b);
+  FfxFloat16x2 dR=FfxFloat16x2(d0.r,d1.r);
+  FfxFloat16x2 dG=FfxFloat16x2(d0.g,d1.g);
+  FfxFloat16x2 dB=FfxFloat16x2(d0.b,d1.b);
+  FfxFloat16x2 eR=FfxFloat16x2(e0.r,e1.r);
+  FfxFloat16x2 eG=FfxFloat16x2(e0.g,e1.g);
+  FfxFloat16x2 eB=FfxFloat16x2(e0.b,e1.b);
+  FfxFloat16x2 fR=FfxFloat16x2(f0.r,f1.r);
+  FfxFloat16x2 fG=FfxFloat16x2(f0.g,f1.g);
+  FfxFloat16x2 fB=FfxFloat16x2(f0.b,f1.b);
+  FfxFloat16x2 hR=FfxFloat16x2(h0.r,h1.r);
+  FfxFloat16x2 hG=FfxFloat16x2(h0.g,h1.g);
+  FfxFloat16x2 hB=FfxFloat16x2(h0.b,h1.b);
+  // Run optional input transform.
+  FsrRcasInputHx2(bR,bG,bB);
+  FsrRcasInputHx2(dR,dG,dB);
+  FsrRcasInputHx2(eR,eG,eB);
+  FsrRcasInputHx2(fR,fG,fB);
+  FsrRcasInputHx2(hR,hG,hB);
+  // Luma times 2.
+  FfxFloat16x2 bL=bB*FFX_BROADCAST_FLOAT16X2(0.5)+(bR*FFX_BROADCAST_FLOAT16X2(0.5)+bG);
+  FfxFloat16x2 dL=dB*FFX_BROADCAST_FLOAT16X2(0.5)+(dR*FFX_BROADCAST_FLOAT16X2(0.5)+dG);
+  FfxFloat16x2 eL=eB*FFX_BROADCAST_FLOAT16X2(0.5)+(eR*FFX_BROADCAST_FLOAT16X2(0.5)+eG);
+  FfxFloat16x2 fL=fB*FFX_BROADCAST_FLOAT16X2(0.5)+(fR*FFX_BROADCAST_FLOAT16X2(0.5)+fG);
+  FfxFloat16x2 hL=hB*FFX_BROADCAST_FLOAT16X2(0.5)+(hR*FFX_BROADCAST_FLOAT16X2(0.5)+hG);
+  // Noise detection.
+  FfxFloat16x2 nz=FFX_BROADCAST_FLOAT16X2(0.25)*bL+FFX_BROADCAST_FLOAT16X2(0.25)*dL+FFX_BROADCAST_FLOAT16X2(0.25)*fL+FFX_BROADCAST_FLOAT16X2(0.25)*hL-eL;
+  nz=ffxSaturate(abs(nz)*ffxApproximateReciprocalMediumHalf(ffxMax3Half(ffxMax3Half(bL,dL,eL),fL,hL)-ffxMin3Half(ffxMin3Half(bL,dL,eL),fL,hL)));
+  nz=FFX_BROADCAST_FLOAT16X2(-0.5)*nz+FFX_BROADCAST_FLOAT16X2(1.0);
+  // Min and max of ring.
+  FfxFloat16x2 mn4R=min(ffxMin3Half(bR,dR,fR),hR);
+  FfxFloat16x2 mn4G=min(ffxMin3Half(bG,dG,fG),hG);
+  FfxFloat16x2 mn4B=min(ffxMin3Half(bB,dB,fB),hB);
+  FfxFloat16x2 mx4R=max(ffxMax3Half(bR,dR,fR),hR);
+  FfxFloat16x2 mx4G=max(ffxMax3Half(bG,dG,fG),hG);
+  FfxFloat16x2 mx4B=max(ffxMax3Half(bB,dB,fB),hB);
+  // Immediate constants for peak range.
+  FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0);
+  // Limiters, these need to be high precision RCPs.
+  FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4R);
+  FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4G);
+  FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4B);
+  FfxFloat16x2 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4R+peakC.y);
+  FfxFloat16x2 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4G+peakC.y);
+  FfxFloat16x2 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4B+peakC.y);
+  FfxFloat16x2 lobeR=max(-hitMinR,hitMaxR);
+  FfxFloat16x2 lobeG=max(-hitMinG,hitMaxG);
+  FfxFloat16x2 lobeB=max(-hitMinB,hitMaxB);
+  FfxFloat16x2 lobe=max(FFX_BROADCAST_FLOAT16X2(-FSR_RCAS_LIMIT),min(ffxMax3Half(lobeR,lobeG,lobeB),FFX_BROADCAST_FLOAT16X2(0.0)))*FFX_BROADCAST_FLOAT16X2(FFX_UINT32_TO_FLOAT16X2(con.y).x);
+  // Apply noise removal.
+  #ifdef FSR_RCAS_DENOISE
+   lobe*=nz;
+  #endif
+  // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
+  FfxFloat16x2 rcpL=ffxApproximateReciprocalMediumHalf(FFX_BROADCAST_FLOAT16X2(4.0)*lobe+FFX_BROADCAST_FLOAT16X2(1.0));
+  pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL;
+  pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL;
+  pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//                                          FSR - [LFGA] LINEAR FILM GRAIN APPLICATOR
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// Adding output-resolution film grain after scaling is a good way to mask both rendering and scaling artifacts.
+// Suggest using tiled blue noise as film grain input, with peak noise frequency set for a specific look and feel.
+// The 'Lfga*()' functions provide a convenient way to introduce grain.
+// These functions limit grain based on distance to signal limits.
+// This is done so that the grain is temporally energy preserving, and thus won't modify image tonality.
+// Grain application should be done in a linear colorspace.
+// The grain should be temporally changing, but have a temporal sum per pixel that adds to zero (non-biased).
+//------------------------------------------------------------------------------------------------------------------------------
+// Usage,
+//   FsrLfga*(
+//    color, // In/out linear colorspace color {0 to 1} ranged.
+//    grain, // Per pixel grain texture value {-0.5 to 0.5} ranged, input is 3-channel to support colored grain.
+//    amount); // Amount of grain (0 to 1} ranged.
+//------------------------------------------------------------------------------------------------------------------------------
+// Example if grain texture is monochrome: 'FsrLfgaF(color,ffxBroadcast3(grain),amount)'
+//==============================================================================================================================
+#if defined(FFX_GPU)
+ // Maximum grain is the minimum distance to the signal limit.
+ void FsrLfgaF(inout FfxFloat32x3 c, FfxFloat32x3 t, FfxFloat32 a)
+ {
+     c += (t * ffxBroadcast3(a)) * ffxMin(ffxBroadcast3(1.0) - c, c);
+ }
+#endif
+//==============================================================================================================================
+#if defined(FFX_GPU)&& FFX_HALF == 1
+ // Half precision version (slower).
+ void FsrLfgaH(inout FfxFloat16x3 c, FfxFloat16x3 t, FfxFloat16 a)
+ {
+     c += (t * FFX_BROADCAST_FLOAT16X3(a)) * min(FFX_BROADCAST_FLOAT16X3(1.0) - c, c);
+ }
+ //------------------------------------------------------------------------------------------------------------------------------
+ // Packed half precision version (faster).
+ void FsrLfgaHx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB,FfxFloat16x2 tR,FfxFloat16x2 tG,FfxFloat16x2 tB,FfxFloat16 a){
+  cR+=(tR*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cR,cR);cG+=(tG*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cG,cG);cB+=(tB*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cB,cB);}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//                                          FSR - [SRTM] SIMPLE REVERSIBLE TONE-MAPPER
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// This provides a way to take linear HDR color {0 to FP16_MAX} and convert it into a temporary {0 to 1} ranged post-tonemapped linear.
+// The tonemapper preserves RGB ratio, which helps maintain HDR color bleed during filtering.
+//------------------------------------------------------------------------------------------------------------------------------
+// Reversible tonemapper usage,
+//  FsrSrtm*(color); // {0 to FP16_MAX} converted to {0 to 1}.
+//  FsrSrtmInv*(color); // {0 to 1} converted into {0 to 32768, output peak safe for FP16}.
+//==============================================================================================================================
+#if defined(FFX_GPU)
+ void FsrSrtmF(inout FfxFloat32x3 c)
+ {
+     c *= ffxBroadcast3(rcp(ffxMax3(c.r, c.g, c.b) + FfxFloat32(1.0)));
+ }
+ // The extra max solves the c=1.0 case (which is a /0).
+ void FsrSrtmInvF(inout FfxFloat32x3 c){c*=ffxBroadcast3(rcp(max(FfxFloat32(1.0/32768.0),FfxFloat32(1.0)-ffxMax3(c.r,c.g,c.b))));}
+#endif
+//==============================================================================================================================
+#if defined(FFX_GPU )&& FFX_HALF == 1
+ void FsrSrtmH(inout FfxFloat16x3 c)
+ {
+     c *= FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(ffxMax3Half(c.r, c.g, c.b) + FFX_BROADCAST_FLOAT16(1.0)));
+ }
+ void FsrSrtmInvH(inout FfxFloat16x3 c)
+ {
+     c *= FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(max(FFX_BROADCAST_FLOAT16(1.0 / 32768.0), FFX_BROADCAST_FLOAT16(1.0) - ffxMax3Half(c.r, c.g, c.b))));
+ }
+ //------------------------------------------------------------------------------------------------------------------------------
+ void FsrSrtmHx2(inout FfxFloat16x2 cR, inout FfxFloat16x2 cG, inout FfxFloat16x2 cB)
+ {
+     FfxFloat16x2 rcp = ffxReciprocalHalf(ffxMax3Half(cR, cG, cB) + FFX_BROADCAST_FLOAT16X2(1.0));
+     cR *= rcp;
+     cG *= rcp;
+     cB *= rcp;
+ }
+ void FsrSrtmInvHx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB)
+ {
+     FfxFloat16x2 rcp=ffxReciprocalHalf(max(FFX_BROADCAST_FLOAT16X2(1.0/32768.0),FFX_BROADCAST_FLOAT16X2(1.0)-ffxMax3Half(cR,cG,cB)));
+     cR*=rcp;
+     cG*=rcp;
+     cB*=rcp;
+ }
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//                                       FSR - [TEPD] TEMPORAL ENERGY PRESERVING DITHER
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// Temporally energy preserving dithered {0 to 1} linear to gamma 2.0 conversion.
+// Gamma 2.0 is used so that the conversion back to linear is just to square the color.
+// The conversion comes in 8-bit and 10-bit modes, designed for output to 8-bit UNORM or 10:10:10:2 respectively.
+// Given good non-biased temporal blue noise as dither input,
+// the output dither will temporally conserve energy.
+// This is done by choosing the linear nearest step point instead of perceptual nearest.
+// See code below for details.
+//------------------------------------------------------------------------------------------------------------------------------
+// DX SPEC RULES FOR FLOAT->UNORM 8-BIT CONVERSION
+// ===============================================
+// - Output is 'FfxUInt32(floor(saturate(n)*255.0+0.5))'.
+// - Thus rounding is to nearest.
+// - NaN gets converted to zero.
+// - INF is clamped to {0.0 to 1.0}.
+//==============================================================================================================================
+#if defined(FFX_GPU)
+ // Hand tuned integer position to dither value, with more values than simple checkerboard.
+ // Only 32-bit has enough precision for this compddation.
+ // Output is {0 to <1}.
+ FfxFloat32 FsrTepdDitF(FfxUInt32x2 p, FfxUInt32 f)
+ {
+     FfxFloat32 x = FfxFloat32(p.x + f);
+     FfxFloat32 y = FfxFloat32(p.y);
+     // The 1.61803 golden ratio.
+     FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0);
+     // Number designed to provide a good visual pattern.
+     FfxFloat32 b = FfxFloat32(1.0 / 3.69);
+     x            = x * a + (y * b);
+     return ffxFract(x);
+ }
+  //------------------------------------------------------------------------------------------------------------------------------
+ // This version is 8-bit gamma 2.0.
+ // The 'c' input is {0 to 1}.
+ // Output is {0 to 1} ready for image store.
+ void FsrTepdC8F(inout FfxFloat32x3 c, FfxFloat32 dit)
+ {
+     FfxFloat32x3 n = ffxSqrt(c);
+     n              = floor(n * ffxBroadcast3(255.0)) * ffxBroadcast3(1.0 / 255.0);
+     FfxFloat32x3 a = n * n;
+     FfxFloat32x3 b = n + ffxBroadcast3(1.0 / 255.0);
+     b              = b * b;
+     // Ratio of 'a' to 'b' required to produce 'c'.
+     // ffxApproximateReciprocal() won't work here (at least for very high dynamic ranges).
+     // ffxApproximateReciprocalMedium() is an IADD,FMA,MUL.
+     FfxFloat32x3 r = (c - b) * ffxApproximateReciprocalMedium(a - b);
+     // Use the ratio as a cutoff to choose 'a' or 'b'.
+     // ffxIsGreaterThanZero() is a MUL.
+     c = ffxSaturate(n + ffxIsGreaterThanZero(ffxBroadcast3(dit) - r) * ffxBroadcast3(1.0 / 255.0));
+ }
+ //------------------------------------------------------------------------------------------------------------------------------
+ // This version is 10-bit gamma 2.0.
+ // The 'c' input is {0 to 1}.
+ // Output is {0 to 1} ready for image store.
+ void FsrTepdC10F(inout FfxFloat32x3 c, FfxFloat32 dit)
+ {
+     FfxFloat32x3 n = ffxSqrt(c);
+     n              = floor(n * ffxBroadcast3(1023.0)) * ffxBroadcast3(1.0 / 1023.0);
+     FfxFloat32x3 a = n * n;
+     FfxFloat32x3 b = n + ffxBroadcast3(1.0 / 1023.0);
+     b              = b * b;
+     FfxFloat32x3 r = (c - b) * ffxApproximateReciprocalMedium(a - b);
+     c              = ffxSaturate(n + ffxIsGreaterThanZero(ffxBroadcast3(dit) - r) * ffxBroadcast3(1.0 / 1023.0));
+ }
+#endif
+//==============================================================================================================================
+#if defined(FFX_GPU)&& FFX_HALF == 1
+ FfxFloat16 FsrTepdDitH(FfxUInt32x2 p, FfxUInt32 f)
+ {
+     FfxFloat32 x = FfxFloat32(p.x + f);
+     FfxFloat32 y = FfxFloat32(p.y);
+     FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0);
+     FfxFloat32 b = FfxFloat32(1.0 / 3.69);
+     x       = x * a + (y * b);
+     return FfxFloat16(ffxFract(x));
+ }
+ //------------------------------------------------------------------------------------------------------------------------------
+ void FsrTepdC8H(inout FfxFloat16x3 c, FfxFloat16 dit)
+ {
+     FfxFloat16x3 n = sqrt(c);
+     n     = floor(n * FFX_BROADCAST_FLOAT16X3(255.0)) * FFX_BROADCAST_FLOAT16X3(1.0 / 255.0);
+     FfxFloat16x3 a = n * n;
+     FfxFloat16x3 b = n + FFX_BROADCAST_FLOAT16X3(1.0 / 255.0);
+     b     = b * b;
+     FfxFloat16x3 r = (c - b) * ffxApproximateReciprocalMediumHalf(a - b);
+     c     = FfxFloat16x3(ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFX_BROADCAST_FLOAT16X3(dit) - r) * FFX_BROADCAST_FLOAT16X3(1.0 / 255.0)));
+ }
+ //------------------------------------------------------------------------------------------------------------------------------
+ void FsrTepdC10H(inout FfxFloat16x3 c, FfxFloat16 dit)
+ {
+     FfxFloat16x3 n = sqrt(c);
+     n     = floor(n * FFX_BROADCAST_FLOAT16X3(1023.0)) * FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0);
+     FfxFloat16x3 a = n * n;
+     FfxFloat16x3 b = n + FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0);
+     b     = b * b;
+     FfxFloat16x3 r = (c - b) * ffxApproximateReciprocalMediumHalf(a - b);
+     c     = FfxFloat16x3(ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFX_BROADCAST_FLOAT16X3(dit) - r) * FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0)));
+ }
+ //==============================================================================================================================
+ // This computes dither for positions 'p' and 'p+{8,0}'.
+ FfxFloat16x2 FsrTepdDitHx2(FfxUInt32x2 p, FfxUInt32 f)
+ {
+     FfxFloat32x2 x;
+     x.x     = FfxFloat32(p.x + f);
+     x.y     = x.x + FfxFloat32(8.0);
+     FfxFloat32 y = FfxFloat32(p.y);
+     FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0);
+     FfxFloat32 b = FfxFloat32(1.0 / 3.69);
+     x       = x * ffxBroadcast2(a) + ffxBroadcast2(y * b);
+     return FfxFloat16x2(ffxFract(x));
+ }
+ //------------------------------------------------------------------------------------------------------------------------------
+ void FsrTepdC8Hx2(inout FfxFloat16x2 cR, inout FfxFloat16x2 cG, inout FfxFloat16x2 cB, FfxFloat16x2 dit)
+ {
+     FfxFloat16x2 nR = sqrt(cR);
+     FfxFloat16x2 nG = sqrt(cG);
+     FfxFloat16x2 nB = sqrt(cB);
+     nR     = floor(nR * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0);
+     nG     = floor(nG * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0);
+     nB     = floor(nB * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0);
+     FfxFloat16x2 aR = nR * nR;
+     FfxFloat16x2 aG = nG * nG;
+     FfxFloat16x2 aB = nB * nB;
+     FfxFloat16x2 bR = nR + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0);
+     bR     = bR * bR;
+     FfxFloat16x2 bG = nG + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0);
+     bG     = bG * bG;
+     FfxFloat16x2 bB = nB + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0);
+     bB     = bB * bB;
+     FfxFloat16x2 rR = (cR - bR) * ffxApproximateReciprocalMediumHalf(aR - bR);
+     FfxFloat16x2 rG = (cG - bG) * ffxApproximateReciprocalMediumHalf(aG - bG);
+     FfxFloat16x2 rB = (cB - bB) * ffxApproximateReciprocalMediumHalf(aB - bB);
+     cR     = FfxFloat16x2(ffxSaturate(nR + ffxIsGreaterThanZeroHalf(dit - rR) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0)));
+     cG     = FfxFloat16x2(ffxSaturate(nG + ffxIsGreaterThanZeroHalf(dit - rG) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0)));
+     cB     = FfxFloat16x2(ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0)));
+ }
+ //------------------------------------------------------------------------------------------------------------------------------
+ void FsrTepdC10Hx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB,FfxFloat16x2 dit){
+  FfxFloat16x2 nR=sqrt(cR);
+  FfxFloat16x2 nG=sqrt(cG);
+  FfxFloat16x2 nB=sqrt(cB);
+  nR=floor(nR*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0);
+  nG=floor(nG*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0);
+  nB=floor(nB*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0);
+  FfxFloat16x2 aR=nR*nR;
+  FfxFloat16x2 aG=nG*nG;
+  FfxFloat16x2 aB=nB*nB;
+  FfxFloat16x2 bR=nR+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bR=bR*bR;
+  FfxFloat16x2 bG=nG+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bG=bG*bG;
+  FfxFloat16x2 bB=nB+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bB=bB*bB;
+  FfxFloat16x2 rR=(cR-bR)*ffxApproximateReciprocalMediumHalf(aR-bR);
+  FfxFloat16x2 rG=(cG-bG)*ffxApproximateReciprocalMediumHalf(aG-bG);
+  FfxFloat16x2 rB=(cB-bB)*ffxApproximateReciprocalMediumHalf(aB-bB);
+  cR=FfxFloat16x2(ffxSaturate(nR+ffxIsGreaterThanZeroHalf(dit-rR)*FFX_BROADCAST_FLOAT16X2(1.0/1023.0)));
+  cG=FfxFloat16x2(ffxSaturate(nG+ffxIsGreaterThanZeroHalf(dit-rG)*FFX_BROADCAST_FLOAT16X2(1.0/1023.0)));
+  cB=FfxFloat16x2(ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFX_BROADCAST_FLOAT16X2(1.0 / 1023.0)));
+}
+#endif
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/fsr1/ffx_fsr1.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/fsr1/ffx_fsr1.h.meta
new file mode 100644
index 00000000..64e4ffaf
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/fsr1/ffx_fsr1.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: 628e23510f46ef44bbf0035ce9a63be0
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/spd.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/spd.meta
new file mode 100644
index 00000000..0b775af5
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/spd.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: 0f03de1579ac3294595ae4f40106b7a2
+folderAsset: yes
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/spd/ffx_spd.h b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/spd/ffx_spd.h
new file mode 100644
index 00000000..6441419b
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/spd/ffx_spd.h
@@ -0,0 +1,1009 @@
+// This file is part of the FidelityFX SDK.
+// 
+// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+/// @defgroup FfxGPUSpd FidelityFX SPD
+/// FidelityFX Single Pass Downsampler 2.0 GPU documentation
+///
+/// @ingroup FfxGPUEffects
+
+/// Setup required constant values for SPD (CPU).
+///
+/// @param [out] dispatchThreadGroupCountXY         CPU side: dispatch thread group count xy. z is number of slices of the input texture
+/// @param [out] workGroupOffset                    GPU side: pass in as constant
+/// @param [out] numWorkGroupsAndMips               GPU side: pass in as constant
+/// @param [in] rectInfo                            left, top, width, height
+/// @param [in] mips                                optional: if -1, calculate based on rect width and height
+///
+/// @ingroup FfxGPUSpd
+#if defined(FFX_CPU)
+FFX_STATIC void ffxSpdSetup(FfxUInt32x2    dispatchThreadGroupCountXY,
+                         FfxUInt32x2    workGroupOffset,
+                         FfxUInt32x2    numWorkGroupsAndMips,
+                         FfxUInt32x4     rectInfo,
+                         FfxInt32 mips)
+{
+    // determines the offset of the first tile to downsample based on
+    // left (rectInfo[0]) and top (rectInfo[1]) of the subregion.
+    workGroupOffset[0] = rectInfo[0] / 64;
+    workGroupOffset[1] = rectInfo[1] / 64;
+
+    FfxUInt32 endIndexX = (rectInfo[0] + rectInfo[2] - 1) / 64;  // rectInfo[0] = left, rectInfo[2] = width
+    FfxUInt32 endIndexY = (rectInfo[1] + rectInfo[3] - 1) / 64;  // rectInfo[1] = top, rectInfo[3] = height
+
+    // we only need to dispatch as many thread groups as tiles we need to downsample
+    // number of tiles per slice depends on the subregion to downsample
+    dispatchThreadGroupCountXY[0] = endIndexX + 1 - workGroupOffset[0];
+    dispatchThreadGroupCountXY[1] = endIndexY + 1 - workGroupOffset[1];
+
+    // number of thread groups per slice
+    numWorkGroupsAndMips[0] = (dispatchThreadGroupCountXY[0]) * (dispatchThreadGroupCountXY[1]);
+
+    if (mips >= 0)
+    {
+        numWorkGroupsAndMips[1] = FfxUInt32(mips);
+    }
+    else
+    {
+        // calculate based on rect width and height
+        FfxUInt32 resolution    = ffxMax(rectInfo[2], rectInfo[3]);
+        numWorkGroupsAndMips[1] = FfxUInt32((ffxMin(floor(log2(FfxFloat32(resolution))), FfxFloat32(12))));
+    }
+}
+
+/// Setup required constant values for SPD (CPU).
+///
+/// @param [out] dispatchThreadGroupCountXY         CPU side: dispatch thread group count xy. z is number of slices of the input texture
+/// @param [out] workGroupOffset                    GPU side: pass in as constant
+/// @param [out] numWorkGroupsAndMips               GPU side: pass in as constant
+/// @param [in] rectInfo                            left, top, width, height
+///
+/// @ingroup FfxGPUSpd
+FFX_STATIC void ffxSpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY,
+                         FfxUInt32x2 workGroupOffset,
+                         FfxUInt32x2 numWorkGroupsAndMips,
+                         FfxUInt32x4  rectInfo)
+{
+    ffxSpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo, -1);
+}
+#endif // #if defined(FFX_CPU)
+
+
+//==============================================================================================================================
+//                                                     NON-PACKED VERSION
+//==============================================================================================================================
+#if defined(FFX_GPU)
+#if defined(FFX_SPD_PACKED_ONLY)
+// Avoid compiler errors by including default implementations of these callbacks.
+FfxFloat32x4 SpdLoadSourceImage(FfxInt32x2 p, FfxUInt32 slice)
+{
+    return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
+}
+
+FfxFloat32x4 SpdLoad(FfxInt32x2 p, FfxUInt32 slice)
+{
+    return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
+}
+void SpdStore(FfxInt32x2 p, FfxFloat32x4 value, FfxUInt32 mip, FfxUInt32 slice)
+{
+}
+FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y)
+{
+    return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
+}
+void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value)
+{
+}
+FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3)
+{
+    return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
+}
+#endif // #if FFX_SPD_PACKED_ONLY
+
+//_____________________________________________________________/\_______________________________________________________________
+
+void ffxSpdWorkgroupShuffleBarrier()
+{
+    FFX_GROUP_MEMORY_BARRIER();
+}
+
+// Only last active workgroup should proceed
+bool SpdExitWorkgroup(FfxUInt32 numWorkGroups, FfxUInt32 localInvocationIndex, FfxUInt32 slice)
+{
+    // global atomic counter
+    if (localInvocationIndex == 0)
+    {
+        SpdIncreaseAtomicCounter(slice);
+    }
+
+    ffxSpdWorkgroupShuffleBarrier();
+    return (SpdGetAtomicCounter() != (numWorkGroups - 1));
+}
+
+// User defined: FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3);
+FfxFloat32x4 SpdReduceQuad(FfxFloat32x4 v)
+{
+#if defined(FFX_GLSL) && !defined(FFX_SPD_NO_WAVE_OPERATIONS)
+
+    FfxFloat32x4 v0 = v;
+    FfxFloat32x4 v1 = subgroupQuadSwapHorizontal(v);
+    FfxFloat32x4 v2 = subgroupQuadSwapVertical(v);
+    FfxFloat32x4 v3 = subgroupQuadSwapDiagonal(v);
+    return SpdReduce4(v0, v1, v2, v3);
+
+#elif defined(FFX_HLSL) && !defined(FFX_SPD_NO_WAVE_OPERATIONS)
+
+    // requires SM6.0
+    FfxUInt32 quad = WaveGetLaneIndex() & (~0x3);
+    FfxFloat32x4     v0   = v;
+    FfxFloat32x4     v1   = WaveReadLaneAt(v, quad | 1);
+    FfxFloat32x4     v2   = WaveReadLaneAt(v, quad | 2);
+    FfxFloat32x4     v3   = WaveReadLaneAt(v, quad | 3);
+    return SpdReduce4(v0, v1, v2, v3);
+/*
+    // if SM6.0 is not available, you can use the AMD shader intrinsics
+    // the AMD shader intrinsics are available in AMD GPU Services (AGS) library:
+    // https://gpuopen.com/amd-gpu-services-ags-library/
+    // works for DX11
+    FfxFloat32x4 v0 = v;
+    FfxFloat32x4 v1;
+    v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+    v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+    v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+    v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+    FfxFloat32x4 v2;
+    v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+    v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+    v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+    v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+    FfxFloat32x4 v3;
+    v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+    v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+    v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+    v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+    return SpdReduce4(v0, v1, v2, v3);
+    */
+#endif
+    return v;
+}
+
+FfxFloat32x4 SpdReduceIntermediate(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3)
+{
+    FfxFloat32x4 v0 = SpdLoadIntermediate(i0.x, i0.y);
+    FfxFloat32x4 v1 = SpdLoadIntermediate(i1.x, i1.y);
+    FfxFloat32x4 v2 = SpdLoadIntermediate(i2.x, i2.y);
+    FfxFloat32x4 v3 = SpdLoadIntermediate(i3.x, i3.y);
+    return SpdReduce4(v0, v1, v2, v3);
+}
+
+FfxFloat32x4 SpdReduceLoad4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
+{
+    FfxFloat32x4 v0 = SpdLoad(FfxInt32x2(i0), slice);
+    FfxFloat32x4 v1 = SpdLoad(FfxInt32x2(i1), slice);
+    FfxFloat32x4 v2 = SpdLoad(FfxInt32x2(i2), slice);
+    FfxFloat32x4 v3 = SpdLoad(FfxInt32x2(i3), slice);
+    return SpdReduce4(v0, v1, v2, v3);
+}
+
+FfxFloat32x4 SpdReduceLoad4(FfxUInt32x2 base, FfxUInt32 slice)
+{
+    return SpdReduceLoad4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
+}
+
+FfxFloat32x4 SpdReduceLoadSourceImage4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
+{
+    FfxFloat32x4 v0 = SpdLoadSourceImage(FfxInt32x2(i0), slice);
+    FfxFloat32x4 v1 = SpdLoadSourceImage(FfxInt32x2(i1), slice);
+    FfxFloat32x4 v2 = SpdLoadSourceImage(FfxInt32x2(i2), slice);
+    FfxFloat32x4 v3 = SpdLoadSourceImage(FfxInt32x2(i3), slice);
+    return SpdReduce4(v0, v1, v2, v3);
+}
+
+FfxFloat32x4 SpdReduceLoadSourceImage(FfxUInt32x2 base, FfxUInt32 slice)
+{
+#if defined(SPD_LINEAR_SAMPLER)
+    return SpdLoadSourceImage(FfxInt32x2(base), slice);
+#else
+    return SpdReduceLoadSourceImage4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
+#endif
+}
+
+void SpdDownsampleMips_0_1_Intrinsics(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+    FfxFloat32x4 v[4];
+
+    FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
+    FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
+    v[0]     = SpdReduceLoadSourceImage(tex, slice);
+    SpdStore(pix, v[0], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
+    v[1] = SpdReduceLoadSourceImage(tex, slice);
+    SpdStore(pix, v[1], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
+    v[2] = SpdReduceLoadSourceImage(tex, slice);
+    SpdStore(pix, v[2], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
+    v[3] = SpdReduceLoadSourceImage(tex, slice);
+    SpdStore(pix, v[3], 0, slice);
+
+    if (mip <= 1)
+        return;
+
+    v[0] = SpdReduceQuad(v[0]);
+    v[1] = SpdReduceQuad(v[1]);
+    v[2] = SpdReduceQuad(v[2]);
+    v[3] = SpdReduceQuad(v[3]);
+
+    if ((localInvocationIndex % 4) == 0)
+    {
+        SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2), v[0], 1, slice);
+        SpdStoreIntermediate(x / 2, y / 2, v[0]);
+
+        SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2), v[1], 1, slice);
+        SpdStoreIntermediate(x / 2 + 8, y / 2, v[1]);
+
+        SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2 + 8), v[2], 1, slice);
+        SpdStoreIntermediate(x / 2, y / 2 + 8, v[2]);
+
+        SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2 + 8), v[3], 1, slice);
+        SpdStoreIntermediate(x / 2 + 8, y / 2 + 8, v[3]);
+    }
+}
+
+void SpdDownsampleMips_0_1_LDS(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+    FfxFloat32x4 v[4];
+
+    FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
+    FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
+    v[0]     = SpdReduceLoadSourceImage(tex, slice);
+    SpdStore(pix, v[0], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
+    v[1] = SpdReduceLoadSourceImage(tex, slice);
+    SpdStore(pix, v[1], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
+    v[2] = SpdReduceLoadSourceImage(tex, slice);
+    SpdStore(pix, v[2], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
+    v[3] = SpdReduceLoadSourceImage(tex, slice);
+    SpdStore(pix, v[3], 0, slice);
+
+    if (mip <= 1)
+        return;
+
+    for (FfxUInt32 i = 0; i < 4; i++)
+    {
+        SpdStoreIntermediate(x, y, v[i]);
+        ffxSpdWorkgroupShuffleBarrier();
+        if (localInvocationIndex < 64)
+        {
+            v[i] = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
+            SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice);
+        }
+        ffxSpdWorkgroupShuffleBarrier();
+    }
+
+    if (localInvocationIndex < 64)
+    {
+        SpdStoreIntermediate(x + 0, y + 0, v[0]);
+        SpdStoreIntermediate(x + 8, y + 0, v[1]);
+        SpdStoreIntermediate(x + 0, y + 8, v[2]);
+        SpdStoreIntermediate(x + 8, y + 8, v[3]);
+    }
+}
+
+void SpdDownsampleMips_0_1(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#if defined(FFX_SPD_NO_WAVE_OPERATIONS)
+    SpdDownsampleMips_0_1_LDS(x, y, workGroupID, localInvocationIndex, mip, slice);
+#else
+    SpdDownsampleMips_0_1_Intrinsics(x, y, workGroupID, localInvocationIndex, mip, slice);
+#endif
+}
+
+
+void SpdDownsampleMip_2(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#if defined(FFX_SPD_NO_WAVE_OPERATIONS)
+    if (localInvocationIndex < 64)
+    {
+        FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
+        SpdStore(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x, y), v, mip, slice);
+        // store to LDS, try to reduce bank conflicts
+        // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+        // ...
+        // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+        SpdStoreIntermediate(x * 2 + y % 2, y * 2, v);
+    }
+#else
+    FfxFloat32x4 v = SpdLoadIntermediate(x, y);
+    v        = SpdReduceQuad(v);
+    // quad index 0 stores result
+    if (localInvocationIndex % 4 == 0)
+    {
+        SpdStore(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
+        SpdStoreIntermediate(x + (y / 2) % 2, y, v);
+    }
+#endif
+}
+
+void SpdDownsampleMip_3(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#if defined(FFX_SPD_NO_WAVE_OPERATIONS)
+    if (localInvocationIndex < 16)
+    {
+        // x 0 x 0
+        // 0 0 0 0
+        // 0 x 0 x
+        // 0 0 0 0
+        FfxFloat32x4 v =
+            SpdReduceIntermediate(FfxUInt32x2(x * 4 + 0 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 2 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 0 + 1, y * 4 + 2), FfxUInt32x2(x * 4 + 2 + 1, y * 4 + 2));
+        SpdStore(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x, y), v, mip, slice);
+        // store to LDS
+        // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0
+        // ...
+        // 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0
+        // ...
+        // 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x
+        // ...
+        SpdStoreIntermediate(x * 4 + y, y * 4, v);
+    }
+#else
+    if (localInvocationIndex < 64)
+    {
+        FfxFloat32x4 v = SpdLoadIntermediate(x * 2 + y % 2, y * 2);
+        v        = SpdReduceQuad(v);
+        // quad index 0 stores result
+        if (localInvocationIndex % 4 == 0)
+        {
+            SpdStore(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
+            SpdStoreIntermediate(x * 2 + y / 2, y * 2, v);
+        }
+    }
+#endif
+}
+
+void SpdDownsampleMip_4(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#if defined(FFX_SPD_NO_WAVE_OPERATIONS)
+    if (localInvocationIndex < 4)
+    {
+        // x 0 0 0 x 0 0 0
+        // ...
+        // 0 x 0 0 0 x 0 0
+        FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 8 + 0 + 0 + y * 2, y * 8 + 0),
+                                         FfxUInt32x2(x * 8 + 4 + 0 + y * 2, y * 8 + 0),
+                                         FfxUInt32x2(x * 8 + 0 + 1 + y * 2, y * 8 + 4),
+                                         FfxUInt32x2(x * 8 + 4 + 1 + y * 2, y * 8 + 4));
+        SpdStore(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x, y), v, mip, slice);
+        // store to LDS
+        // x x x x 0 ...
+        // 0 ...
+        SpdStoreIntermediate(x + y * 2, 0, v);
+    }
+#else
+    if (localInvocationIndex < 16)
+    {
+        FfxFloat32x4 v = SpdLoadIntermediate(x * 4 + y, y * 4);
+        v        = SpdReduceQuad(v);
+        // quad index 0 stores result
+        if (localInvocationIndex % 4 == 0)
+        {
+            SpdStore(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
+            SpdStoreIntermediate(x / 2 + y, 0, v);
+        }
+    }
+#endif
+}
+
+void SpdDownsampleMip_5(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#if defined(FFX_SPD_NO_WAVE_OPERATIONS)
+    if (localInvocationIndex < 1)
+    {
+        // x x x x 0 ...
+        // 0 ...
+        FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(0, 0), FfxUInt32x2(1, 0), FfxUInt32x2(2, 0), FfxUInt32x2(3, 0));
+        SpdStore(FfxInt32x2(workGroupID.xy), v, mip, slice);
+    }
+#else
+    if (localInvocationIndex < 4)
+    {
+        FfxFloat32x4 v = SpdLoadIntermediate(localInvocationIndex, 0);
+        v        = SpdReduceQuad(v);
+        // quad index 0 stores result
+        if (localInvocationIndex % 4 == 0)
+        {
+            SpdStore(FfxInt32x2(workGroupID.xy), v, mip, slice);
+        }
+    }
+#endif
+}
+
+void SpdDownsampleMips_6_7(FfxUInt32 x, FfxUInt32 y, FfxUInt32 mips, FfxUInt32 slice)
+{
+    FfxInt32x2   tex = FfxInt32x2(x * 4 + 0, y * 4 + 0);
+    FfxInt32x2   pix = FfxInt32x2(x * 2 + 0, y * 2 + 0);
+    FfxFloat32x4 v0  = SpdReduceLoad4(tex, slice);
+    SpdStore(pix, v0, 6, slice);
+
+    tex       = FfxInt32x2(x * 4 + 2, y * 4 + 0);
+    pix       = FfxInt32x2(x * 2 + 1, y * 2 + 0);
+    FfxFloat32x4 v1 = SpdReduceLoad4(tex, slice);
+    SpdStore(pix, v1, 6, slice);
+
+    tex       = FfxInt32x2(x * 4 + 0, y * 4 + 2);
+    pix       = FfxInt32x2(x * 2 + 0, y * 2 + 1);
+    FfxFloat32x4 v2 = SpdReduceLoad4(tex, slice);
+    SpdStore(pix, v2, 6, slice);
+
+    tex       = FfxInt32x2(x * 4 + 2, y * 4 + 2);
+    pix       = FfxInt32x2(x * 2 + 1, y * 2 + 1);
+    FfxFloat32x4 v3 = SpdReduceLoad4(tex, slice);
+    SpdStore(pix, v3, 6, slice);
+
+    if (mips <= 7)
+        return;
+    // no barrier needed, working on values only from the same thread
+
+    FfxFloat32x4 v = SpdReduce4(v0, v1, v2, v3);
+    SpdStore(FfxInt32x2(x, y), v, 7, slice);
+    SpdStoreIntermediate(x, y, v);
+}
+
+void SpdDownsampleNextFour(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 baseMip, FfxUInt32 mips, FfxUInt32 slice)
+{
+    if (mips <= baseMip)
+        return;
+    ffxSpdWorkgroupShuffleBarrier();
+    SpdDownsampleMip_2(x, y, workGroupID, localInvocationIndex, baseMip, slice);
+
+    if (mips <= baseMip + 1)
+        return;
+    ffxSpdWorkgroupShuffleBarrier();
+    SpdDownsampleMip_3(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice);
+
+    if (mips <= baseMip + 2)
+        return;
+    ffxSpdWorkgroupShuffleBarrier();
+    SpdDownsampleMip_4(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice);
+
+    if (mips <= baseMip + 3)
+        return;
+    ffxSpdWorkgroupShuffleBarrier();
+    SpdDownsampleMip_5(workGroupID, localInvocationIndex, baseMip + 3, slice);
+}
+
+/// Downsamples a 64x64 tile based on the work group id.
+/// If after downsampling it's the last active thread group, computes the remaining MIP levels.
+///
+/// @param [in] workGroupID             index of the work group / thread group
+/// @param [in] localInvocationIndex    index of the thread within the thread group in 1D
+/// @param [in] mips                    the number of total MIP levels to compute for the input texture
+/// @param [in] numWorkGroups           the total number of dispatched work groups / thread groups for this slice
+/// @param [in] slice                   the slice of the input texture
+///
+/// @ingroup FfxGPUSpd
+void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice)
+{
+    // compute MIP level 0 and 1
+    FfxUInt32x2        sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64);
+    FfxUInt32 x      = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2);
+    FfxUInt32 y      = sub_xy.y + 8 * ((localInvocationIndex >> 7));
+    SpdDownsampleMips_0_1(x, y, workGroupID, localInvocationIndex, mips, slice);
+
+    // compute MIP level 2, 3, 4, 5
+    SpdDownsampleNextFour(x, y, workGroupID, localInvocationIndex, 2, mips, slice);
+
+    if (mips <= 6)
+        return;
+
+    // increase the global atomic counter for the given slice and check if it's the last remaining thread group:
+    // terminate if not, continue if yes.
+    if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice))
+        return;
+
+    // reset the global atomic counter back to 0 for the next spd dispatch
+    SpdResetAtomicCounter(slice);
+
+    // After mip 5 there is only a single workgroup left that downsamples the remaining up to 64x64 texels.
+    // compute MIP level 6 and 7
+    SpdDownsampleMips_6_7(x, y, mips, slice);
+
+    // compute MIP level 8, 9, 10, 11
+    SpdDownsampleNextFour(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice);
+}
+/// Downsamples a 64x64 tile based on the work group id and work group offset.
+/// If after downsampling it's the last active thread group, computes the remaining MIP levels.
+///
+/// @param [in] workGroupID             index of the work group / thread group
+/// @param [in] localInvocationIndex    index of the thread within the thread group in 1D
+/// @param [in] mips                    the number of total MIP levels to compute for the input texture
+/// @param [in] numWorkGroups           the total number of dispatched work groups / thread groups for this slice
+/// @param [in] slice                   the slice of the input texture
+/// @param [in] workGroupOffset         the work group offset. it's (0,0) in case the entire input texture is downsampled.
+///
+/// @ingroup FfxGPUSpd
+void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset)
+{
+    SpdDownsample(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice);
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+//==============================================================================================================================
+//                                                       PACKED VERSION
+//==============================================================================================================================
+
+#if FFX_HALF
+
+FfxFloat16x4 SpdReduceQuadH(FfxFloat16x4 v)
+{
+#if defined(FFX_GLSL) && !defined(FFX_SPD_NO_WAVE_OPERATIONS)
+    FfxFloat16x4 v0 = v;
+    FfxFloat16x4 v1 = subgroupQuadSwapHorizontal(v);
+    FfxFloat16x4 v2 = subgroupQuadSwapVertical(v);
+    FfxFloat16x4 v3 = subgroupQuadSwapDiagonal(v);
+    return SpdReduce4H(v0, v1, v2, v3);
+#elif defined(FFX_HLSL) && !defined(FFX_SPD_NO_WAVE_OPERATIONS)
+    // requires SM6.0
+    FfxUInt32 quad = WaveGetLaneIndex() & (~0x3);
+    FfxFloat16x4        v0   = v;
+    FfxFloat16x4        v1   = WaveReadLaneAt(v, quad | 1);
+    FfxFloat16x4        v2   = WaveReadLaneAt(v, quad | 2);
+    FfxFloat16x4        v3   = WaveReadLaneAt(v, quad | 3);
+    return SpdReduce4H(v0, v1, v2, v3);
+/*
+    // if SM6.0 is not available, you can use the AMD shader intrinsics
+    // the AMD shader intrinsics are available in AMD GPU Services (AGS) library:
+    // https://gpuopen.com/amd-gpu-services-ags-library/
+    // works for DX11
+    FfxFloat16x4 v0 = v;
+    FfxFloat16x4 v1;
+    v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+    v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+    v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+    v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+    FfxFloat16x4 v2;
+    v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+    v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+    v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+    v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+    FfxFloat16x4 v3;
+    v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+    v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+    v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+    v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+    return SpdReduce4H(v0, v1, v2, v3);
+    */
+#endif
+    return FfxFloat16x4(0.0, 0.0, 0.0, 0.0);
+}
+
+FfxFloat16x4 SpdReduceIntermediateH(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3)
+{
+    FfxFloat16x4 v0 = SpdLoadIntermediateH(i0.x, i0.y);
+    FfxFloat16x4 v1 = SpdLoadIntermediateH(i1.x, i1.y);
+    FfxFloat16x4 v2 = SpdLoadIntermediateH(i2.x, i2.y);
+    FfxFloat16x4 v3 = SpdLoadIntermediateH(i3.x, i3.y);
+    return SpdReduce4H(v0, v1, v2, v3);
+}
+
+FfxFloat16x4 SpdReduceLoad4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
+{
+    FfxFloat16x4 v0 = SpdLoadH(FfxInt32x2(i0), slice);
+    FfxFloat16x4 v1 = SpdLoadH(FfxInt32x2(i1), slice);
+    FfxFloat16x4 v2 = SpdLoadH(FfxInt32x2(i2), slice);
+    FfxFloat16x4 v3 = SpdLoadH(FfxInt32x2(i3), slice);
+    return SpdReduce4H(v0, v1, v2, v3);
+}
+
+FfxFloat16x4 SpdReduceLoad4H(FfxUInt32x2 base, FfxUInt32 slice)
+{
+    return SpdReduceLoad4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
+}
+
+FfxFloat16x4 SpdReduceLoadSourceImage4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
+{
+    FfxFloat16x4 v0 = SpdLoadSourceImageH(FfxInt32x2(i0), slice);
+    FfxFloat16x4 v1 = SpdLoadSourceImageH(FfxInt32x2(i1), slice);
+    FfxFloat16x4 v2 = SpdLoadSourceImageH(FfxInt32x2(i2), slice);
+    FfxFloat16x4 v3 = SpdLoadSourceImageH(FfxInt32x2(i3), slice);
+    return SpdReduce4H(v0, v1, v2, v3);
+}
+
+FfxFloat16x4 SpdReduceLoadSourceImageH(FfxUInt32x2 base, FfxUInt32 slice)
+{
+#if defined(SPD_LINEAR_SAMPLER)
+    return SpdLoadSourceImageH(FfxInt32x2(base), slice);
+#else
+    return SpdReduceLoadSourceImage4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
+#endif
+}
+
+void SpdDownsampleMips_0_1_IntrinsicsH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice)
+{
+    FfxFloat16x4 v[4];
+
+    FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
+    FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
+    v[0]     = SpdReduceLoadSourceImageH(tex, slice);
+    SpdStoreH(pix, v[0], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
+    v[1] = SpdReduceLoadSourceImageH(tex, slice);
+    SpdStoreH(pix, v[1], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
+    v[2] = SpdReduceLoadSourceImageH(tex, slice);
+    SpdStoreH(pix, v[2], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
+    v[3] = SpdReduceLoadSourceImageH(tex, slice);
+    SpdStoreH(pix, v[3], 0, slice);
+
+    if (mips <= 1)
+        return;
+
+    v[0] = SpdReduceQuadH(v[0]);
+    v[1] = SpdReduceQuadH(v[1]);
+    v[2] = SpdReduceQuadH(v[2]);
+    v[3] = SpdReduceQuadH(v[3]);
+
+    if ((localInvocationIndex % 4) == 0)
+    {
+        SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2), v[0], 1, slice);
+        SpdStoreIntermediateH(x / 2, y / 2, v[0]);
+
+        SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2), v[1], 1, slice);
+        SpdStoreIntermediateH(x / 2 + 8, y / 2, v[1]);
+
+        SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2 + 8), v[2], 1, slice);
+        SpdStoreIntermediateH(x / 2, y / 2 + 8, v[2]);
+
+        SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2 + 8), v[3], 1, slice);
+        SpdStoreIntermediateH(x / 2 + 8, y / 2 + 8, v[3]);
+    }
+}
+
+void SpdDownsampleMips_0_1_LDSH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice)
+{
+    FfxFloat16x4 v[4];
+
+    FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
+    FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
+    v[0]     = SpdReduceLoadSourceImageH(tex, slice);
+    SpdStoreH(pix, v[0], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
+    v[1] = SpdReduceLoadSourceImageH(tex, slice);
+    SpdStoreH(pix, v[1], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
+    v[2] = SpdReduceLoadSourceImageH(tex, slice);
+    SpdStoreH(pix, v[2], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
+    v[3] = SpdReduceLoadSourceImageH(tex, slice);
+    SpdStoreH(pix, v[3], 0, slice);
+
+    if (mips <= 1)
+        return;
+
+    for (FfxUInt32 i = 0; i < 4; i++)
+    {
+        SpdStoreIntermediateH(x, y, v[i]);
+        ffxSpdWorkgroupShuffleBarrier();
+        if (localInvocationIndex < 64)
+        {
+            v[i] = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
+            SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice);
+        }
+        ffxSpdWorkgroupShuffleBarrier();
+    }
+
+    if (localInvocationIndex < 64)
+    {
+        SpdStoreIntermediateH(x + 0, y + 0, v[0]);
+        SpdStoreIntermediateH(x + 8, y + 0, v[1]);
+        SpdStoreIntermediateH(x + 0, y + 8, v[2]);
+        SpdStoreIntermediateH(x + 8, y + 8, v[3]);
+    }
+}
+
+void SpdDownsampleMips_0_1H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice)
+{
+#if defined(FFX_SPD_NO_WAVE_OPERATIONS)
+    SpdDownsampleMips_0_1_LDSH(x, y, workGroupID, localInvocationIndex, mips, slice);
+#else
+    SpdDownsampleMips_0_1_IntrinsicsH(x, y, workGroupID, localInvocationIndex, mips, slice);
+#endif
+}
+
+
+void SpdDownsampleMip_2H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#if defined(FFX_SPD_NO_WAVE_OPERATIONS)
+    if (localInvocationIndex < 64)
+    {
+        FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
+        SpdStoreH(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x, y), v, mip, slice);
+        // store to LDS, try to reduce bank conflicts
+        // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+        // ...
+        // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+        SpdStoreIntermediateH(x * 2 + y % 2, y * 2, v);
+    }
+#else
+    FfxFloat16x4 v = SpdLoadIntermediateH(x, y);
+    v     = SpdReduceQuadH(v);
+    // quad index 0 stores result
+    if (localInvocationIndex % 4 == 0)
+    {
+        SpdStoreH(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
+        SpdStoreIntermediateH(x + (y / 2) % 2, y, v);
+    }
+#endif
+}
+
+void SpdDownsampleMip_3H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#if defined(FFX_SPD_NO_WAVE_OPERATIONS)
+    if (localInvocationIndex < 16)
+    {
+        // x 0 x 0
+        // 0 0 0 0
+        // 0 x 0 x
+        // 0 0 0 0
+        FfxFloat16x4 v =
+            SpdReduceIntermediateH(FfxUInt32x2(x * 4 + 0 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 2 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 0 + 1, y * 4 + 2), FfxUInt32x2(x * 4 + 2 + 1, y * 4 + 2));
+        SpdStoreH(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x, y), v, mip, slice);
+        // store to LDS
+        // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0
+        // ...
+        // 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0
+        // ...
+        // 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x
+        // ...
+        SpdStoreIntermediateH(x * 4 + y, y * 4, v);
+    }
+#else
+    if (localInvocationIndex < 64)
+    {
+        FfxFloat16x4 v = SpdLoadIntermediateH(x * 2 + y % 2, y * 2);
+        v     = SpdReduceQuadH(v);
+        // quad index 0 stores result
+        if (localInvocationIndex % 4 == 0)
+        {
+            SpdStoreH(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
+            SpdStoreIntermediateH(x * 2 + y / 2, y * 2, v);
+        }
+    }
+#endif
+}
+
+void SpdDownsampleMip_4H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#if defined(FFX_SPD_NO_WAVE_OPERATIONS)
+    if (localInvocationIndex < 4)
+    {
+        // x 0 0 0 x 0 0 0
+        // ...
+        // 0 x 0 0 0 x 0 0
+        FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 8 + 0 + 0 + y * 2, y * 8 + 0),
+                                       FfxUInt32x2(x * 8 + 4 + 0 + y * 2, y * 8 + 0),
+                                       FfxUInt32x2(x * 8 + 0 + 1 + y * 2, y * 8 + 4),
+                                       FfxUInt32x2(x * 8 + 4 + 1 + y * 2, y * 8 + 4));
+        SpdStoreH(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x, y), v, mip, slice);
+        // store to LDS
+        // x x x x 0 ...
+        // 0 ...
+        SpdStoreIntermediateH(x + y * 2, 0, v);
+    }
+#else
+    if (localInvocationIndex < 16)
+    {
+        FfxFloat16x4 v = SpdLoadIntermediateH(x * 4 + y, y * 4);
+        v     = SpdReduceQuadH(v);
+        // quad index 0 stores result
+        if (localInvocationIndex % 4 == 0)
+        {
+            SpdStoreH(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
+            SpdStoreIntermediateH(x / 2 + y, 0, v);
+        }
+    }
+#endif
+}
+
+void SpdDownsampleMip_5H(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#if defined(FFX_SPD_NO_WAVE_OPERATIONS)
+    if (localInvocationIndex < 1)
+    {
+        // x x x x 0 ...
+        // 0 ...
+        FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(0, 0), FfxUInt32x2(1, 0), FfxUInt32x2(2, 0), FfxUInt32x2(3, 0));
+        SpdStoreH(FfxInt32x2(workGroupID.xy), v, mip, slice);
+    }
+#else
+    if (localInvocationIndex < 4)
+    {
+        FfxFloat16x4 v = SpdLoadIntermediateH(localInvocationIndex, 0);
+        v     = SpdReduceQuadH(v);
+        // quad index 0 stores result
+        if (localInvocationIndex % 4 == 0)
+        {
+            SpdStoreH(FfxInt32x2(workGroupID.xy), v, mip, slice);
+        }
+    }
+#endif
+}
+
+void SpdDownsampleMips_6_7H(FfxUInt32 x, FfxUInt32 y, FfxUInt32 mips, FfxUInt32 slice)
+{
+    FfxInt32x2 tex = FfxInt32x2(x * 4 + 0, y * 4 + 0);
+    FfxInt32x2 pix = FfxInt32x2(x * 2 + 0, y * 2 + 0);
+    FfxFloat16x4  v0  = SpdReduceLoad4H(tex, slice);
+    SpdStoreH(pix, v0, 6, slice);
+
+    tex    = FfxInt32x2(x * 4 + 2, y * 4 + 0);
+    pix    = FfxInt32x2(x * 2 + 1, y * 2 + 0);
+    FfxFloat16x4 v1 = SpdReduceLoad4H(tex, slice);
+    SpdStoreH(pix, v1, 6, slice);
+
+    tex    = FfxInt32x2(x * 4 + 0, y * 4 + 2);
+    pix    = FfxInt32x2(x * 2 + 0, y * 2 + 1);
+    FfxFloat16x4 v2 = SpdReduceLoad4H(tex, slice);
+    SpdStoreH(pix, v2, 6, slice);
+
+    tex    = FfxInt32x2(x * 4 + 2, y * 4 + 2);
+    pix    = FfxInt32x2(x * 2 + 1, y * 2 + 1);
+    FfxFloat16x4 v3 = SpdReduceLoad4H(tex, slice);
+    SpdStoreH(pix, v3, 6, slice);
+
+    if (mips < 8)
+        return;
+    // no barrier needed, working on values only from the same thread
+
+    FfxFloat16x4 v = SpdReduce4H(v0, v1, v2, v3);
+    SpdStoreH(FfxInt32x2(x, y), v, 7, slice);
+    SpdStoreIntermediateH(x, y, v);
+}
+
+void SpdDownsampleNextFourH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 baseMip, FfxUInt32 mips, FfxUInt32 slice)
+{
+    if (mips <= baseMip)
+        return;
+    ffxSpdWorkgroupShuffleBarrier();
+    SpdDownsampleMip_2H(x, y, workGroupID, localInvocationIndex, baseMip, slice);
+
+    if (mips <= baseMip + 1)
+        return;
+    ffxSpdWorkgroupShuffleBarrier();
+    SpdDownsampleMip_3H(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice);
+
+    if (mips <= baseMip + 2)
+        return;
+    ffxSpdWorkgroupShuffleBarrier();
+    SpdDownsampleMip_4H(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice);
+
+    if (mips <= baseMip + 3)
+        return;
+    ffxSpdWorkgroupShuffleBarrier();
+    SpdDownsampleMip_5H(workGroupID, localInvocationIndex, baseMip + 3, slice);
+}
+
+/// Downsamples a 64x64 tile based on the work group id and work group offset.
+/// If after downsampling it's the last active thread group, computes the remaining MIP levels.
+/// Uses half types.
+///
+/// @param [in] workGroupID             index of the work group / thread group
+/// @param [in] localInvocationIndex    index of the thread within the thread group in 1D
+/// @param [in] mips                    the number of total MIP levels to compute for the input texture
+/// @param [in] numWorkGroups           the total number of dispatched work groups / thread groups for this slice
+/// @param [in] slice                   the slice of the input texture
+///
+/// @ingroup FfxGPUSpd
+void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice)
+{
+    FfxUInt32x2        sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64);
+    FfxUInt32 x      = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2);
+    FfxUInt32 y      = sub_xy.y + 8 * ((localInvocationIndex >> 7));
+
+    // compute MIP level 0 and 1
+    SpdDownsampleMips_0_1H(x, y, workGroupID, localInvocationIndex, mips, slice);
+
+    // compute MIP level 2, 3, 4, 5
+    SpdDownsampleNextFourH(x, y, workGroupID, localInvocationIndex, 2, mips, slice);
+
+    if (mips < 7)
+        return;
+
+    // increase the global atomic counter for the given slice and check if it's the last remaining thread group:
+    // terminate if not, continue if yes.
+    if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice))
+        return;
+
+    // reset the global atomic counter back to 0 for the next spd dispatch
+    SpdResetAtomicCounter(slice);
+
+    // After mip 5 there is only a single workgroup left that downsamples the remaining up to 64x64 texels.
+    // compute MIP level 6 and 7
+    SpdDownsampleMips_6_7H(x, y, mips, slice);
+
+    // compute MIP level 8, 9, 10, 11
+    SpdDownsampleNextFourH(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice);
+}
+
+/// Downsamples a 64x64 tile based on the work group id and work group offset.
+/// If after downsampling it's the last active thread group, computes the remaining MIP levels.
+/// Uses half types.
+///
+/// @param [in] workGroupID             index of the work group / thread group
+/// @param [in] localInvocationIndex    index of the thread within the thread group in 1D
+/// @param [in] mips                    the number of total MIP levels to compute for the input texture
+/// @param [in] numWorkGroups           the total number of dispatched work groups / thread groups for this slice
+/// @param [in] slice                   the slice of the input texture
+/// @param [in] workGroupOffset         the work group offset. it's (0,0) in case the entire input texture is downsampled.
+///
+/// @ingroup FfxGPUSpd
+void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset)
+{
+    SpdDownsampleH(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice);
+}
+
+#endif // #if FFX_HALF
+#endif // #if defined(FFX_GPU)
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/spd/ffx_spd.h.meta b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/spd/ffx_spd.h.meta
new file mode 100644
index 00000000..8d4e716c
--- /dev/null
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3/shaders/fsr3upscaler/spd/ffx_spd.h.meta
@@ -0,0 +1,27 @@
+fileFormatVersion: 2
+guid: face65176ee3b82498bd0b8fed0ddacd
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  defineConstraints: []
+  isPreloaded: 0
+  isOverridable: 0
+  isExplicitlyReferenced: 0
+  validateReferences: 1
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3Upscaler.cs b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3Upscaler.cs
index 58341a82..e5b81547 100644
--- a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3Upscaler.cs
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/FSR3Upscaler.cs
@@ -1,8 +1,9 @@
 ﻿using System.Collections.Generic;
+using FidelityFX;
 
 namespace UnityEngine.Rendering.HighDefinition.AMD.FSR3
 {
-    public class FSR3Upscaler: UpscalerPlugin
+    public class FSR3UpscalerPlugin: UpscalerPlugin
     {
         private static FSR3GraphicsDevice sGraphicsDeviceInstance;
         
@@ -35,23 +36,34 @@ namespace UnityEngine.Rendering.HighDefinition.AMD.FSR3
 
     public class FSR3GraphicsDevice : GraphicsDevice
     {
-        private static readonly Stack<FSR3Context> sContextPool = new();
+        private readonly Stack<FSR3Context> _contextPool = new();
+
+        private Fsr3UpscalerAssets _assets;
         
         internal bool Initialize()
         {
-            // TODO
-            return false;
+            if (_assets != null)
+                return true;
+            
+            _assets = Resources.Load<Fsr3UpscalerAssets>("Fsr3UpscalerAssets");
+            return _assets != null;
         }
 
         internal void Shutdown()
         {
-            // TODO
+            if (_assets != null)
+            {
+                Resources.UnloadAsset(_assets);
+                _assets = null;
+            }
+            
+            // TODO? destroy all FSR3 contexts on the stack
         }
         
         public override FSR2Context CreateFeature(CommandBuffer cmd, in FSR2CommandInitializationData initSettings)
         {
-            var context = sContextPool.Count != 0 ? sContextPool.Pop() : new FSR3Context();
-            context.Init(initSettings); // TODO might need some way to distinguish between contexts (see featureSlot)
+            var context = _contextPool.Count != 0 ? _contextPool.Pop() : new FSR3Context();
+            context.Init(initSettings, _assets); // TODO might need some way to distinguish between contexts (see featureSlot)
             return context;
         }
 
@@ -59,7 +71,7 @@ namespace UnityEngine.Rendering.HighDefinition.AMD.FSR3
         {
             var context = (FSR3Context)fsrContext;
             context.Reset();
-            sContextPool.Push(context);
+            _contextPool.Push(context);
         }
 
         public override void ExecuteFSR2(CommandBuffer cmd, FSR2Context fsrContext, in FSR2TextureTable textures)
@@ -69,12 +81,15 @@ namespace UnityEngine.Rendering.HighDefinition.AMD.FSR3
 
         public override bool GetRenderResolutionFromQualityMode(FSR2Quality qualityMode, uint displayWidth, uint displayHeight, out uint renderWidth, out uint renderHeight)
         {
-            throw new System.NotImplementedException();
+            Fsr3Upscaler.GetRenderResolutionFromQualityMode(out int rw, out int rh, (int)displayWidth, (int)displayHeight, (Fsr3Upscaler.QualityMode)qualityMode);
+            renderWidth = (uint)rw;
+            renderHeight = (uint)rh;
+            return true;
         }
 
         public override float GetUpscaleRatioFromQualityMode(FSR2Quality qualityMode)
         {
-            throw new System.NotImplementedException();
+            return Fsr3Upscaler.GetUpscaleRatioFromQualityMode((Fsr3Upscaler.QualityMode)qualityMode);
         }
     }
 
@@ -86,22 +101,66 @@ namespace UnityEngine.Rendering.HighDefinition.AMD.FSR3
         private FSR2CommandExecutionData _executeData;
         public override ref FSR2CommandExecutionData executeData => ref _executeData;
 
-        internal void Init(in FSR2CommandInitializationData initSettings)
+        private readonly Fsr3UpscalerContext _context = new Fsr3UpscalerContext();
+
+        internal void Init(in FSR2CommandInitializationData initSettings, Fsr3UpscalerAssets assets)
         {
-            // TODO: create internal context data
             _initData = initSettings;
+
+            Fsr3Upscaler.InitializationFlags flags = 0;
+            if (initSettings.GetFlag(FfxFsr2InitializationFlags.EnableHighDynamicRange)) flags |= Fsr3Upscaler.InitializationFlags.EnableHighDynamicRange;
+            if (initSettings.GetFlag(FfxFsr2InitializationFlags.EnableDisplayResolutionMotionVectors)) flags |= Fsr3Upscaler.InitializationFlags.EnableDisplayResolutionMotionVectors;
+            if (initSettings.GetFlag(FfxFsr2InitializationFlags.EnableMotionVectorsJitterCancellation)) flags |= Fsr3Upscaler.InitializationFlags.EnableMotionVectorsJitterCancellation;
+            if (initSettings.GetFlag(FfxFsr2InitializationFlags.DepthInverted)) flags |= Fsr3Upscaler.InitializationFlags.EnableDepthInverted;
+            if (initSettings.GetFlag(FfxFsr2InitializationFlags.EnableDepthInfinite)) flags |= Fsr3Upscaler.InitializationFlags.EnableDepthInfinite;
+            if (initSettings.GetFlag(FfxFsr2InitializationFlags.EnableAutoExposure)) flags |= Fsr3Upscaler.InitializationFlags.EnableAutoExposure;
+            if (initSettings.GetFlag(FfxFsr2InitializationFlags.EnableDynamicResolution)) flags |= Fsr3Upscaler.InitializationFlags.EnableDynamicResolution;
+
+            _context.Create(new Fsr3Upscaler.ContextDescription
+            {
+                DisplaySize = new Vector2Int((int)initSettings.displaySizeWidth, (int)initSettings.displaySizeHeight),
+                MaxRenderSize = new Vector2Int((int)initSettings.maxRenderSizeWidth, (int)initSettings.maxRenderSizeHeight),
+                Flags = flags,
+                Shaders = assets.shaders,
+            });
         }
 
         internal void Reset()
         {
-            // TODO: destroy internal context data
+            _context.Destroy();
+            
             _initData = new FSR2CommandInitializationData();
             _executeData = new FSR2CommandExecutionData();
         }
 
         internal void Draw(CommandBuffer cmd, in FSR2TextureTable textures)
         {
+            var dispatchDescription = new Fsr3Upscaler.DispatchDescription
+            {
+                Color = new ResourceView(textures.colorInput),
+                Depth = new ResourceView(textures.depth),
+                MotionVectors = new ResourceView(textures.motionVectors),
+                Exposure = new ResourceView(textures.exposureTexture),
+                Reactive = new ResourceView(textures.biasColorMask),
+                TransparencyAndComposition = new ResourceView(textures.transparencyMask),
+                Output = new ResourceView(textures.colorOutput),
+                
+                JitterOffset = new Vector2(_executeData.jitterOffsetX, _executeData.jitterOffsetY),
+                MotionVectorScale = new Vector2(_executeData.MVScaleX, _executeData.MVScaleY),
+                RenderSize = new Vector2Int((int)_executeData.renderSizeWidth, (int)_executeData.renderSizeHeight),
+                InputResourceSize = new Vector2Int((int)_executeData.renderSizeWidth, (int)_executeData.renderSizeHeight),
+                EnableSharpening = _executeData.enableSharpening != 0,
+                Sharpness = _executeData.sharpness,
+                FrameTimeDelta = _executeData.frameTimeDelta / 1000f,
+                PreExposure = _executeData.preExposure,
+                Reset = _executeData.reset != 0,
+                CameraNear = _executeData.cameraNear,
+                CameraFar = _executeData.cameraFar,
+                CameraFovAngleVertical = _executeData.cameraFovAngleVertical,
+                ViewSpaceToMetersFactor = 1.0f, // 1 unit is 1 meter in Unity
+            };
             
+            _context.Dispatch(dispatchDescription, cmd);
         }
     }
 }
diff --git a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/UpscalerPlugin.cs b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/UpscalerPlugin.cs
index 5d0bc15f..ca2d4969 100644
--- a/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/UpscalerPlugin.cs
+++ b/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/RenderPass/Upscalers/UpscalerPlugin.cs
@@ -5,7 +5,8 @@ namespace UnityEngine.Rendering.HighDefinition.AMD
     public static class AMDUnityPlugin
     {
         // TODO: allow dynamic switching between plugins (including shutdown of previous plugin)
-        internal static readonly UpscalerPlugin ActivePlugin = new FSR2Wrapper.FSR2WrapperUpscaler();
+        // internal static readonly UpscalerPlugin ActivePlugin = new FSR2Wrapper.FSR2WrapperUpscaler();
+        internal static readonly UpscalerPlugin ActivePlugin = new FSR3.FSR3UpscalerPlugin();
 
         public static bool Load() => ActivePlugin.Load();
 
@@ -66,7 +67,7 @@ namespace UnityEngine.Rendering.HighDefinition.AMD
 
         public uint maxRenderSizeWidth;
 
-        public bool GetFlag(FfxFsr2InitializationFlags flag)
+        public readonly bool GetFlag(FfxFsr2InitializationFlags flag)
         {
             return (ffxFsrFlags & flag) == flag;
         }